AI03, Artificial neural networks

Back to the previous page ｜ page management
List of posts to read before reading this article

Resource
Numpy
Tensorflow
Pytorch

Resource

CPU

CPU Resource info

# cat /proc/cpuinfo

Total number of CPU cores

$ grep -c processor /proc/cpuinfo

Number of CPUs

$ grep "physical id" /proc/cpuinfo | sort -u | wc -l

Number of cores per one CPU

$ grep "cpu cores" /proc/cpuinfo | tail -1

GPU

GPU Resource info

GPU Monitoring tools URL
GPU Memory control(1) GPU Memory control(2)

$ nvidia-smi
$ watch -n 1 -d nvidia-smi
$ fuser -v /dev/nvidia*

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

OUTPUT

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7812072362293866351
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 12834618334973673973
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 10813738189
locality {
  bus_id: 1
  links {
  }
}
incarnation: 2176570505504160042
physical_device_desc: "device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:3b:00.0, compute capability: 7.5"
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 10813738189
locality {
  bus_id: 1
  links {
  }
}
incarnation: 16344150243988831062
physical_device_desc: "device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:5e:00.0, compute capability: 7.5"
, name: "/device:GPU:2"
device_type: "GPU"
memory_limit: 10813738189
locality {
  bus_id: 2
  numa_node: 1
  links {
  }
}
incarnation: 15503034830640890796
physical_device_desc: "device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:86:00.0, compute capability: 7.5"
, name: "/device:GPU:3"
device_type: "GPU"
memory_limit: 10812430746
locality {
  bus_id: 2
  numa_node: 1
  links {
  }
}
incarnation: 17206545542125030428
physical_device_desc: "device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:af:00.0, compute capability: 7.5"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 3251941024359796176
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_GPU:1"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 14468545947390282029
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_GPU:2"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 759770992281457065
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_GPU:3"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 15023472020250575167
physical_device_desc: "device: XLA_GPU device"
]

Deallocate memory on GPU

$ nvidia-smi --gpu-reset -i 0

# forcely
$ kill -9 [PID_num]

Allocate memory on GPU

URL1, URL2
, tensorflow : One GPU(default)

import tensorflow as tf

[Code : data preprocessing]
[Code : data neural net model]

tensorflow : One GPU with CPU

import tensroflow as tf

tf.debugging.set_log_device_placement(True)

try:
    with tf.device('/device:CPU:0'):
        [Code : data preprocessing]
    with tf.device('/device:GPU:2'):
        [Code : deep neural net model]
        
except RuntimeError as e:
    print(e)

tensorflow : Multi-GPU with CPU

import tensorflow as tf

tf.debugging.set_log_device_placement(True)

gpus = tf.config.experimental.list_logical_devices('GPU')
if gpus:
    with tf.device('/CPU:0'):
        [Code : data preprocessing]

    for gpu in gpus:
        with tf.device(gpu.name):
            [Code : deep neural net model]

pytorch

tensorboard

import tensorflow as tf
from datetime import datetime
import os

%load_ext tensorboard
%matplotlib inline

[Code : data preprocessing]
[Code : data neural net model]

tensorboard = tf.keras.callbacks.TensorBoard(
    log_dir=os.path.join('logs',  datetime.now().strftime("%Y%m%d-%H%M%S")), 
    write_graph=True, 
    write_images=True,
    histogram_freq=1
)

%tensorboard --logdir logs --port [port_num]

Numpy

Regression

Simple Linear regression

import numpy as np
import matplotlib.pyplot as plt

def cost():
    c = 0
    for i in range(len(X)) :
        c += (W * X[i] - Y[i]) ** 2
    return c / len(X)

def W_grad():
    return np.sum(np.multiply(np.multiply(W, X) + b - Y, X))

def b_grad():
    return np.sum(np.multiply(np.multiply(W, X) + b - Y, 1))

# data
X = np.array([1, 2, 3, 4, 5])
Y = np.array([1, 2, 3, 4, 5])

# parameters
W = 2.5; b = 1;
alpha = 0.01; beta = 0.1;
fig, axes = plt.subplots(1,2,figsize=(15,5))

# gradient descent
epochs = 5;
curr_cost = []; step = [];
for i in range(epochs):
    # update
    W = W - np.multiply(alpha, W_grad()); print('W = ', W)
    b = b - np.multiply(beta, b_grad()); print('b = ', b)
    
    # visualize results
    curr_cost.append(cost())
    step.append(i+1)
    axes[1].plot(X, W*X + b)
axes[0].plot(step, curr_cost)    
axes[1].plot(X,Y, 'o')
axes[0].grid(True)
axes[1].grid(True)
plt.show()

Multi-variable regression

Logistic regression

Soft-max regression

FCN

FCN through numerical method

import time
import numpy as np

epsilon = 0.0001

def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_error(h, y):
    return 1 / 2 * np.mean(np.square(h - y))


class Neuron:
    def __init__(self, W, b, a):
        # Model Parameter
        self.W = W
        self.b = b
        self.a = a

        # Gradients
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)

    def __call__(self, x):
        return self.a(_m(_t(self.W), x) + self.b) # activation((W^T)x + b)

class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,))

        self.sequence = list()
        # First hidden layer
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))

        # Hidden layers
        for _ in range(hidden_depth - 1):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))

        # Output layer
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Neuron(W, b, activation))

    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x

    def calc_gradient(self, x, y, loss_func):
        def get_new_sequence(layer_index, new_neuron):
            new_sequence = list()
            for i, layer in enumerate(self.sequence):
                if i == layer_index:
                    new_sequence.append(new_neuron)
                else:
                    new_sequence.append(layer)
            return new_sequence

        def eval_sequence(x, sequence):
            for layer in sequence:
                x = layer(x)
            return x

        loss = loss_func(self(x), y)

        for layer_id, layer in enumerate(self.sequence): # iterate layer
            for w_i, w in enumerate(layer.W): # iterate W (row)
                for w_j, ww in enumerate(w): # iterate W (col)
                    W = np.copy(layer.W)
                    W[w_i][w_j] = ww + epsilon

                    new_neuron = Neuron(W, layer.b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_neuron)
                    h = eval_sequence(x, new_seq)

                    num_grad = (loss_func(h, y) - loss) / epsilon  # (f(x+eps) - f(x)) / epsilon
                    layer.dW[w_i][w_j] = num_grad

                for b_i, bb in enumerate(layer.b): # iterate b
                    b = np.copy(layer.b)
                    b[b_i] = bb + epsilon

                    new_neuron = Neuron(layer.W, b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_neuron)
                    h = eval_sequence(x, new_seq)

                    num_grad = (loss_func(h, y) - loss) / epsilon  # (f(x+eps) - f(x)) / epsilon
                    layer.db[b_i] = num_grad
        return loss

def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = network.calc_gradient(x, y, loss_obj)
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=sigmoid)

t = time.time()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, mean_squared_error, 0.01)
    print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))

FCN through backpropagation

import time
import numpy as np

def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

class Sigmoid:
    def __init__(self):
        self.last_o = 1

    def __call__(self, x):
        self.last_o = 1 / (1.0 + np.exp(-x))
        return self.last_o

    def grad(self): # sigmoid(x)(1-sigmoid(x))
        return self.last_o * (1 - self.last_o)

class MeanSquaredError:
    def __init__(self):
        # gradient
        self.dh = 1
        self.last_diff = 1

    def __call__(self, h, y): # 1/2 * mean ((h - y)^2)
        self.last_diff = h - y
        return 1 / 2 * np.mean(np.square(h - y))

    def grad(self): # h - y
        return self.last_diff

class Neuron:
    def __init__(self, W, b, a_obj):
        # Model parameters
        self.W = W
        self.b = b
        self.a = a_obj()

        # gradient
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dh = np.zeros_like(_t(self.W))

        self.last_x = np.zeros((self.W.shape[0]))
        self.last_h = np.zeros((self.W.shape[1]))

    def __call__(self, x):
        self.last_x = x
        self.last_h = _m(_t(self.W), x) + self.b
        return self.a(self.last_h)

    def grad(self): # dy/dh = W
        return self.W * self.a.grad()

    def grad_W(self, dh):
        grad = np.ones_like(self.W)
        grad_a = self.a.grad()
        for j in range(grad.shape[1]): # dy/dw = x
            grad[:, j] = dh[j] * grad_a[j] * self.last_x
        return grad

    def grad_b(self, dh): # dy/dh = 1
        return dh * self.a.grad()

class DNN:
    def __init__(self, hidden_depth, num_neuron, input, output, activation=Sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,))

        self.sequence = list()
        # First hidden layer
        W, b = init_var(input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))

        # Hidden Layers
        for index in range(hidden_depth):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))

        # Output Layer
        W, b = init_var(num_neuron, output)
        self.sequence.append(Neuron(W, b, activation))

    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x

    def calc_gradient(self, loss_obj):
        loss_obj.dh = loss_obj.grad()
        self.sequence.append(loss_obj)

        # back-prop loop
        for i in range(len(self.sequence) - 1, 0, -1):
            l1 = self.sequence[i]
            l0 = self.sequence[i - 1]

            l0.dh = _m(l0.grad(), l1.dh)
            l0.dW = l0.grad_W(l1.dh)
            l0.db = l0.grad_b(l1.dh)

        self.sequence.remove(loss_obj)

def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = loss_obj(network(x), y)  # Forward inference
    network.calc_gradient(loss_obj)  # Back-propagation
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

t = time.time()
dnn = DNN(hidden_depth=5, num_neuron=32, input=10, output=2, activation=Sigmoid)
loss_obj = MeanSquaredError()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, loss_obj, alpha=0.01)
    print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))

CNN

RNN

Tensorflow

Regression

Simple Linear regression

import tensorflow as tf
import matplotlib.pyplot as plt

def cost():
    return tf.reduce_mean(tf.square(W * X + b - Y))

def W_grad():
    return tf.reduce_mean(tf.multiply(tf.multiply(W, X) + b - Y, X))

def b_grad():
    return tf.reduce_mean(tf.multiply(tf.multiply(W, X) + b - Y, X))

# data
X = [1., 2., 3., 4., 5.]
Y = [1., 3., 5., 7., 9.]

# parameters
W = tf.Variable([5.0]); b = tf.Variable([1.0]);
alpha = 0.05; beta = 0.05;
fig, axes = plt.subplots(1,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    curr_grad = W - tf.multiply(alpha, W_grad()); W.assign(curr_grad); print('W = ', W.numpy())
    curr_grad = b - tf.multiply(beta, b_grad()); b.assign(curr_grad); print('b = ', b.numpy())
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost())
    axes[1].plot(X, W*X + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X, Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
plt.show()

with GradientTape

import tensorflow as tf
import matplotlib.pyplot as plt
#tf.enable_eager_execution()

# data
X = [1, 2, 3, 4, 5]
Y = [1, 2, 3, 4, 5]

# parameters
W = tf.Variable(2.9); b = tf.Variable(0.5);
alpha = 0.03; beta = 0.03;
fig, axes = plt.subplots(1,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = W * X + b
        cost = tf.reduce_mean(tf.square(hypothesis - Y))
    W_grad, b_grad = tape.gradient(cost, [W, b])
    W.assign_sub(alpha * W_grad); print('W = ', W.numpy())
    b.assign_sub(beta * b_grad); print('b = ', b.numpy())

    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X, W*X + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X, Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
plt.show()

Multi-variable regression

with GradientTape

import tensorflow as tf
import matplotlib.pyplot as plt

# data
X1 = [1, 0, 3, 0, 5]; X2 = [0, 2, 0, 4, 0]
Y  = [1, 2, 3, 4, 5]

# parameters
W1 = tf.Variable([1.0]); W2 = tf.Variable([1.0]); b = tf.Variable([1.0]);
alpha1 = tf.Variable(0.03); alpha2 = tf.Variable(0.03); beta = tf.Variable(0.03);
fig, axes = plt.subplots(1,3,figsize=(15,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = W1*X1 + W2*X2 + b
        cost = tf.reduce_mean(tf.square(hypothesis - Y))
    W1_grad, W2_grad, b_grad = tape.gradient(cost, [W1, W2, b])
    W1.assign_sub(alpha1 * W1_grad); print('W1 = ', W1.numpy())
    W2.assign_sub(alpha2 * W2_grad); print('W2 = ', W2.numpy())
    b.assign_sub(beta * b_grad); print('b = ', b.numpy())
    
    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X1, W1*X1 + W2*X2 + b)
    axes[2].plot(X2, W1*X1 + W2*X2 + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X1, Y, 'x')
axes[2].plot(X2, Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()    

with GradientTape, vectorization(matrix)

import tensorflow as tf
import matplotlib.pyplot as plt

# data
X = [[1., 0., 3., 0., 5.],
     [0., 2., 0., 4., 0.]]
Y  = [1, 2, 3, 4, 5]

# parameters
W = tf.Variable([[1.0, 1.0]]); b = tf.Variable([1.0]);
learning_rate = tf.Variable(0.05)
fig, axes = plt.subplots(1, 3, figsize=(15,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, X) + b # (1, 2) * (2, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - Y))

    W_grad, b_grad = tape.gradient(cost, [W, b])
    W.assign_sub(learning_rate * W_grad); print(W.numpy())
    b.assign_sub(learning_rate * b_grad); print(b.numpy())
        
    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X[0], W[0][0]*X[0] + W[0][1]*X[1] + b)
    axes[2].plot(X[1], W[0][0]*X[0] + W[0][1]*X[1] + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[0], Y, 'x')
axes[2].plot(X[1], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

with GradientTape, vectorization(matrix), optimizer(update)

import tensorflow as tf
import matplotlib.pyplot as plt

# data
X = [[1., 0., 3., 0., 5.],
     [0., 2., 0., 4., 0.]]
Y  = [1, 2, 3, 4, 5]

# parameters
W = tf.Variable([[1.0, 1.0]]); b = tf.Variable([1.0]);
learning_rate = tf.Variable(0.05)
fig, axes = plt.subplots(1, 3, figsize=(15,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
optimizer = tf.keras.optimizers.SGD(learning_rate)
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, X) + b # (1, 2) * (2, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - Y))

    W_grad, b_grad = tape.gradient(cost, [W, b])
    optimizer.apply_gradients(grads_and_vars=zip([W_grad, b_grad],[W, b])); print('W = ', W.numpy(),'b = ',b.numpy())
        
    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X[0], W[0][0]*X[0] + W[0][1]*X[1] + b)
    axes[2].plot(X[1], W[0][0]*X[0] + W[0][1]*X[1] + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[0], Y, 'x')
axes[2].plot(X[1], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

with GradientTape, vectorization(matrix), implicit bias(b)

import tensorflow as tf
import matplotlib.pyplot as plt

# data
X = [[1., 1., 1., 1., 1.],   # bias(b)
     [1., 0., 3., 0., 5.],   # feature 1
     [0., 2., 0., 4., 0.]]   # feature 2
Y  = [1, 2, 3, 4, 5]

# parameters
W = tf.Variable([[1.0, 1.0, 1.0]])
learning_rate = tf.Variable(0.05)
fig, axes = plt.subplots(1, 3, figsize=(15,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, X)   # (1, 3) * (3, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - Y))

    W_grad = tape.gradient(cost, [W])
    W.assign_sub(learning_rate * W_grad[0]);print(W.numpy())
        
    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X[1], W[0][1]*X[1] + W[0][2]*X[2] + W[0][0]*X[0])
    axes[2].plot(X[2], W[0][1]*X[1] + W[0][2]*X[2] + W[0][0]*X[0])
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[1], Y, 'x')
axes[2].plot(X[2], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

with GradientTape, vectorization(matrix), implicit bias(b), optimizer(update)

import tensorflow as tf
import matplotlib.pyplot as plt

# data
X = [[1., 1., 1., 1., 1.],   # bias(b)
     [1., 0., 3., 0., 5.],   # feature 1
     [0., 2., 0., 4., 0.]]   # feature 2
Y  = [1, 2, 3, 4, 5]

# parameters
W = tf.Variable([[1.0, 1.0, 1.0]])
learning_rate = tf.Variable(0.05)
fig, axes = plt.subplots(1, 3, figsize=(15,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
optimizer = tf.keras.optimizers.SGD(learning_rate)
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, X)   # (1, 3) * (3, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - Y))

    W_grad = tape.gradient(cost, [W])
    optimizer.apply_gradients(grads_and_vars=zip(W_grad,[W])); print(W.numpy())
        
        
    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X[1], W[0][1]*X[1] + W[0][2]*X[2] + W[0][0]*X[0])
    axes[2].plot(X[2], W[0][1]*X[1] + W[0][2]*X[2] + W[0][0]*X[0])
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[1], Y, 'x')
axes[2].plot(X[2], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

Logistic regression

import tensorflow as tf
import matplotlib.pyplot as plt

# data
x_train = tf.constant([[1., 2.],
                       [2., 3.],
                       [3., 1.],
                       [4., 3.],
                       [5., 3.],
                       [6., 2.]])
y_train = tf.constant([[0.],
                       [0.],
                       [0.],
                       [1.],
                       [1.],
                       [1.]])
x_test = tf.constant([[5.,2.]])
y_test = tf.constant([[1.]])
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))#.repeat()

# parameters
W = tf.Variable(tf.zeros([2,1])); b = tf.Variable(tf.zeros([1]));
learning_rate = 0.001
fig, axes = plt.subplots(1,3,figsize=(15,5))

# gradient descent
epochs = 1000
curr_cost = []; step = [];
optimizer = tf.keras.optimizers.SGD(learning_rate)
for i in range(epochs):
    for features, labels  in iter(dataset):
        with tf.GradientTape() as tape:
            hypothesis = tf.divide(1., 1. + tf.exp(tf.matmul(features, W) + b))
            cost = -tf.reduce_mean(labels * tf.math.log(hypothesis) + (1 - labels) * tf.math.log(1 - hypothesis))
        grads = tape.gradient(cost, [W,b])
        optimizer.apply_gradients(grads_and_vars=zip(grads,[W,b])); print(W.numpy(), b.numpy())

        # visualize results
        curr_cost.append(cost)
        step.append(i+1)
        axes[1].plot(features[:,0], tf.divide(1., 1. + tf.exp(tf.matmul(features,W) + b)))
        axes[2].plot(features[:,1], tf.divide(1., 1. + tf.exp(tf.matmul(features,W) + b)))
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(x_train[:,0], y_train, 'x')
axes[2].plot(x_train[:,1], y_train, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

hypothesis = tf.divide(1., 1. + tf.exp(tf.matmul(x_test, W) + b))
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, y_test), dtype=tf.int32))
print(predicted)

Soft-max regression

Perceptron

OR

XOR

FCN

Beginner mode

import tensorflow as tf

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28, 28)),
                                    tf.keras.layers.Dense(2000, activation='relu'),
                                    tf.keras.layers.Dropout(0.2),
                                    tf.keras.layers.Dense(1000, activation='relu'),
                                    tf.keras.layers.Dense(500, activation='relu'),
                                    tf.keras.layers.Dense(200, activation='relu'),
                                    tf.keras.layers.Dense(10, activation='softmax')])
model.compile(optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)

Expert mode

CNN

Beginner mode

import tensorflow as tf
import numpy as np

from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras.models import Sequential

EPOCHS = 10

def MyModel():
    return Sequential([Conv2D(32, (3, 3), padding='same', activation='relu'), # 28x28x32
                       MaxPool2D(), # 14x14x32
                       Conv2D(64, (3, 3), padding='same', activation='relu'), # 14x14x64
                       MaxPool2D(), # 7x7x64
                       Conv2D(128, (3, 3), padding='same', activation='relu'), # 7x7x128
                       Flatten(), # 6272
                       Dense(128, activation='relu'),
                       Dense(10, activation='softmax')]) # 128


fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train.astype(np.float32)
x_test = x_test.astype(np.float32)

# NHWC
x_train = x_train[..., np.newaxis]
x_test = x_test[..., np.newaxis]

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32).prefetch(2048)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32).prefetch(2048)


model = MyModel()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(train_ds, validation_data=test_ds, epochs=EPOCHS)            

Expert mode

import tensorflow as tf
import numpy as np

EPOCHS = 10

class ConvNet(tf.keras.Model):
    def __init__(self):
        super(ConvNet, self).__init__()
        conv2d = tf.keras.layers.Conv2D
        maxpool = tf.keras.layers.MaxPool2D
        self.sequence = list()
        self.sequence.append(conv2d(16, (3, 3), padding='same', activation='relu')) # 28x28x16
        self.sequence.append(conv2d(16, (3, 3), padding='same', activation='relu')) # 28x28x16
        self.sequence.append(maxpool((2,2))) # 14x14x16
        self.sequence.append(conv2d(32, (3, 3), padding='same', activation='relu')) # 14x14x32
        self.sequence.append(conv2d(32, (3, 3), padding='same', activation='relu')) # 14x14x32
        self.sequence.append(maxpool((2,2))) # 7x7x32
        self.sequence.append(conv2d(64, (3, 3), padding='same', activation='relu')) # 7x7x64
        self.sequence.append(conv2d(64, (3, 3), padding='same', activation='relu')) # 7x7x64
        self.sequence.append(tf.keras.layers.Flatten()) # 1568
        self.sequence.append(tf.keras.layers.Dense(128, activation='relu'))
        self.sequence.append(tf.keras.layers.Dense(10, activation='softmax'))

    def call(self, x, training=False, mask=None):
        for layer in self.sequence:
            x = layer(x)
        return x

# Implement training loop
@tf.function
def train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy):
    with tf.GradientTape() as tape:
        predictions = model(images)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

# Implement algorithm test
@tf.function
def test_step(model, images, labels, loss_object, test_loss, test_accuracy):
    predictions = model(images)

    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# x_train : (NUM_SAMPLE, 28, 28) -> (NUM_SAMPLE, 28, 28, 1)
x_train = x_train[..., tf.newaxis].astype(np.float32)
x_test = x_test[..., tf.newaxis].astype(np.float32)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


# Create model
model = ConvNet()

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

for epoch in range(EPOCHS):
    for images, labels in train_ds:
        train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy)

    for test_images, test_labels in test_ds:
        test_step(model, test_images, test_labels, loss_object, test_loss, test_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100,
                          test_loss.result(),
                          test_accuracy.result() * 100))
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

CNN(DNN)

Beginner mode

Expert mode

import tensorflow as tf
import numpy as np

EPOCHS = 10

class DenseUnit(tf.keras.Model):
    def __init__(self, filter_out, kernel_size):
        super(DenseUnit, self).__init__()
        self.bn = tf.keras.layers.BatchNormalization()
        self.conv = tf.keras.layers.Conv2D(filter_out, kernel_size, padding='same')
        self.concat = tf.keras.layers.Concatenate()

    def call(self, x, training=False, mask=None): # x: (Batch, H, W, Ch_in)
        h = self.bn(x, training=training)
        h = tf.nn.relu(h)
        h = self.conv(h) # h: (Batch, H, W, filter_output)
        return self.concat([x, h]) # (Batch, H, W, (Ch_in + filter_output))

class DenseLayer(tf.keras.Model):
    def __init__(self, num_unit, growth_rate, kernel_size):
        super(DenseLayer, self).__init__()
        self.sequence = list()
        for idx in range(num_unit):
            self.sequence.append(DenseUnit(growth_rate, kernel_size))

    def call(self, x, training=False, mask=None):
        for unit in self.sequence:
            x = unit(x, training=training)
        return x

class TransitionLayer(tf.keras.Model):
    def __init__(self, filters, kernel_size):
        super(TransitionLayer, self).__init__()
        self.conv = tf.keras.layers.Conv2D(filters, kernel_size, padding='same')
        self.pool = tf.keras.layers.MaxPool2D()

    def call(self, x, training=False, mask=None):
        x = self.conv(x)
        return self.pool(x)

class DenseNet(tf.keras.Model):
    def __init__(self):
        super(DenseNet, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(8, (3, 3), padding='same', activation='relu') # 28x28x8

        self.dl1 = DenseLayer(2, 4, (3, 3)) # 28x28x16
        self.tr1 = TransitionLayer(16, (3, 3)) # 14x14x16

        self.dl2 = DenseLayer(2, 8, (3, 3)) # 14x14x32
        self.tr2 = TransitionLayer(32, (3, 3)) # 7x7x32

        self.dl3 = DenseLayer(2, 16, (3, 3)) # 7x7x64

        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(128, activation='relu')
        self.dense2 = tf.keras.layers.Dense(10, activation='softmax')

    def call(self, x, training=False, mask=None):
        x = self.conv1(x)

        x = self.dl1(x, training=training)
        x = self.tr1(x)

        x = self.dl2(x, training=training)
        x = self.tr2(x)

        x = self.dl3(x, training=training)

        x = self.flatten(x)
        x = self.dense1(x)
        return self.dense2(x)

# Implement training loop
@tf.function
def train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

# Implement algorithm test
@tf.function
def test_step(model, images, labels, loss_object, test_loss, test_accuracy):
    predictions = model(images, training=False)

    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train[..., tf.newaxis].astype(np.float32)
x_test = x_test[..., tf.newaxis].astype(np.float32)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


# Create model
model = DenseNet()

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

for epoch in range(EPOCHS):
    for images, labels in train_ds:
        train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy)

    for test_images, test_labels in test_ds:
        test_step(model, test_images, test_labels, loss_object, test_loss, test_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100,
                          test_loss.result(),
                          test_accuracy.result() * 100))
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

RNN(LSTM)

Beginner mode

import tensorflow as tf

EPOCHS = 10
NUM_WORDS = 10000

class MyModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 16)
        self.rnn = tf.keras.layers.SimpleRNN(32)
        self.dense = tf.keras.layers.Dense(1, activation='sigmoid')
    
    def call(self, x, training=None, mask=None):
        x = self.emb(x)
        x = self.rnn(x)
        return self.dense(x)


imdb = tf.keras.datasets.imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=NUM_WORDS)
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                        value=0,
                                                        padding='pre',
                                                        maxlen=32)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                       value=0,
                                                       padding='pre',
                                                       maxlen=32)
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(1000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


model = MyModel()
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.fit(train_ds, validation_data=test_ds, epochs=EPOCHS)

Expert mode

import tensorflow as tf

EPOCHS = 10
NUM_WORDS = 10000

class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 16)
        self.rnn = tf.keras.layers.LSTM(32)
        self.dense = tf.keras.layers.Dense(2, activation='softmax')

    def call(self, x, training=None, mask=None):
        x = self.emb(x)
        x = self.rnn(x)
        return self.dense(x)

# Implement training loop
@tf.function
def train_step(model, inputs, labels, loss_object, optimizer, train_loss, train_accuracy):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

# Implement algorithm test
@tf.function
def test_step(model, images, labels, loss_object, test_loss, test_accuracy):
    predictions = model(images, training=False)

    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

imdb = tf.keras.datasets.imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=NUM_WORDS)

x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                       value=0,
                                                       padding='pre',
                                                       maxlen=32)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                      value=0,
                                                      padding='pre',
                                                      maxlen=32)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

# Create model
model = MyModel()

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')


for epoch in range(EPOCHS):
    for seqs, labels in train_ds:
        train_step(model, seqs, labels, loss_object, optimizer, train_loss, train_accuracy)

    for test_seqs, test_labels in test_ds:
        test_step(model, test_seqs, test_labels, loss_object, test_loss, test_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100,
                          test_loss.result(),
                          test_accuracy.result() * 100))
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

GAN

github

Beginner mode

Expert mode

ResNET

Beginner mode

Expert mode

import tensorflow as tf
import numpy as np

EPOCHS = 10

class ResidualUnit(tf.keras.Model):
    def __init__(self, filter_in, filter_out, kernel_size):
        super(ResidualUnit, self).__init__()
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv1 = tf.keras.layers.Conv2D(filter_out, kernel_size, padding='same')

        self.bn2 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(filter_out, kernel_size, padding='same')

        if filter_in == filter_out:
            self.identity = lambda x: x
        else:
            self.identity = tf.keras.layers.Conv2D(filter_out, (1,1), padding='same')

    def call(self, x, training=False, mask=None):
        h = self.bn1(x, training=training)
        h = tf.nn.relu(h)
        h = self.conv1(h)

        h = self.bn2(h, training=training)
        h = tf.nn.relu(h)
        h = self.conv2(h)
        return self.identity(x) + h

class ResnetLayer(tf.keras.Model):
    def __init__(self, filter_in, filters, kernel_size):
        super(ResnetLayer, self).__init__()
        self.sequence = list()
        for f_in, f_out in zip([filter_in] + list(filters), filters):
            self.sequence.append(ResidualUnit(f_in, f_out, kernel_size))

    def call(self, x, training=False, mask=None):
        for unit in self.sequence:
            x = unit(x, training=training)
        return x

class ResNet(tf.keras.Model):
    def __init__(self):
        super(ResNet, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(8, (3, 3), padding='same', activation='relu') # 28x28x8

        self.res1 = ResnetLayer(8, (16, 16), (3, 3)) # 28x28x16
        self.pool1 = tf.keras.layers.MaxPool2D((2, 2)) # 14x14x16

        self.res2 = ResnetLayer(16, (32, 32), (3, 3)) # 14x14x32
        self.pool2 = tf.keras.layers.MaxPool2D((2, 2)) # 7x7x32

        self.res3 = ResnetLayer(32, (64, 64), (3, 3)) # 7x7x64

        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(128, activation='relu')
        self.dense2 = tf.keras.layers.Dense(10, activation='softmax')

    def call(self, x, training=False, mask=None):
        x = self.conv1(x)

        x = self.res1(x, training=training)
        x = self.pool1(x)
        x = self.res2(x, training=training)
        x = self.pool2(x)
        x = self.res3(x, training=training)

        x = self.flatten(x)
        x = self.dense1(x)
        return self.dense2(x)

# Implement training loop
@tf.function
def train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

# Implement algorithm test
@tf.function
def test_step(model, images, labels, loss_object, test_loss, test_accuracy):
    predictions = model(images, training=False)

    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train[..., tf.newaxis].astype(np.float32)
x_test = x_test[..., tf.newaxis].astype(np.float32)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

# Create model
model = ResNet()

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')


for epoch in range(EPOCHS):
    for images, labels in train_ds:
        train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy)

    for test_images, test_labels in test_ds:
        test_step(model, test_images, test_labels, loss_object, test_loss, test_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100,
                          test_loss.result(),
                          test_accuracy.result() * 100))
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

Attention Net

Beginner mode

Expert mode

chatbot_data.zip

import random
import tensorflow as tf
from konlpy.tag import Okt

EPOCHS = 200
NUM_WORDS = 2000

class Encoder(tf.keras.Model):
    def __init__(self):
        super(Encoder, self).__init__()
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 64)
        self.lstm = tf.keras.layers.LSTM(512, return_sequences=True, return_state=True)

    def call(self, x, training=False, mask=None):
        x = self.emb(x)
        H, h, c = self.lstm(x)
        return H, h, c

class Decoder(tf.keras.Model):
    def __init__(self):
        super(Decoder, self).__init__()
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 64)
        self.lstm = tf.keras.layers.LSTM(512, return_sequences=True, return_state=True)
        self.att = tf.keras.layers.Attention()
        self.dense = tf.keras.layers.Dense(NUM_WORDS, activation='softmax')

    def call(self, inputs, training=False, mask=None):
        x, s0, c0, H = inputs
        x = self.emb(x)
        S, h, c = self.lstm(x, initial_state=[s0, c0])

        S_ = tf.concat([s0[:, tf.newaxis, :], S[:, :-1, :]], axis=1)
        A = self.att([S_, H])
        y = tf.concat([S, A], axis=-1)

        return self.dense(y), h, c

class Seq2seq(tf.keras.Model):
    def __init__(self, sos, eos):
        super(Seq2seq, self).__init__()
        self.enc = Encoder()
        self.dec = Decoder()
        self.sos = sos
        self.eos = eos

    def call(self, inputs, training=False, mask=None):
        if training is True:
            x, y = inputs
            H, h, c = self.enc(x)
            y, _, _ = self.dec((y, h, c, H))
            return y
        else:
            x = inputs
            H, h, c = self.enc(x)

            y = tf.convert_to_tensor(self.sos)
            y = tf.reshape(y, (1, 1))

            seq = tf.TensorArray(tf.int32, 64)

            for idx in tf.range(64):
                y, h, c = self.dec([y, h, c, H])
                y = tf.cast(tf.argmax(y, axis=-1), dtype=tf.int32)
                y = tf.reshape(y, (1, 1))
                seq = seq.write(idx, y)

                if y == self.eos:
                    break

            return tf.reshape(seq.stack(), (1, 64))

# Implement training loop
@tf.function
def train_step(model, inputs, labels, loss_object, optimizer, train_loss, train_accuracy):
    output_labels = labels[:, 1:]
    shifted_labels = labels[:, :-1]
    with tf.GradientTape() as tape:
        predictions = model([inputs, shifted_labels], training=True)
        loss = loss_object(output_labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(output_labels, predictions)

# Implement algorithm test
@tf.function
def test_step(model, inputs):
    return model(inputs, training=False)



dataset_file = 'chatbot_data.csv' # acquired from 'http://www.aihub.or.kr' and modified
okt = Okt()

with open(dataset_file, 'r') as file:
    lines = file.readlines()
    seq = [' '.join(okt.morphs(line)) for line in lines]

questions = seq[::2]
answers = ['\t ' + lines for lines in seq[1::2]]

num_sample = len(questions)

perm = list(range(num_sample))
random.seed(0)
random.shuffle(perm)

train_q = list()
train_a = list()
test_q = list()
test_a = list()

for idx, qna in enumerate(zip(questions, answers)):
    q, a = qna
    if perm[idx] > num_sample//5:
        train_q.append(q)
        train_a.append(a)
    else:
        test_q.append(q)
        test_a.append(a)

tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=NUM_WORDS,
                                                  filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~')

tokenizer.fit_on_texts(train_q + train_a)

train_q_seq = tokenizer.texts_to_sequences(train_q)
train_a_seq = tokenizer.texts_to_sequences(train_a)

test_q_seq = tokenizer.texts_to_sequences(test_q)
test_a_seq = tokenizer.texts_to_sequences(test_a)

x_train = tf.keras.preprocessing.sequence.pad_sequences(train_q_seq,
                                                        value=0,
                                                        padding='pre',
                                                        maxlen=64)
y_train = tf.keras.preprocessing.sequence.pad_sequences(train_a_seq,
                                                        value=0,
                                                        padding='post',
                                                        maxlen=65)

x_test = tf.keras.preprocessing.sequence.pad_sequences(test_q_seq,
                                                       value=0,
                                                       padding='pre',
                                                       maxlen=64)
y_test = tf.keras.preprocessing.sequence.pad_sequences(test_a_seq,
                                                       value=0,
                                                       padding='post',
                                                       maxlen=65)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32).prefetch(1024)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(1).prefetch(1024)


# Create model
model = Seq2seq(sos=tokenizer.word_index['\t'],
                eos=tokenizer.word_index['\n'])

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')


for epoch in range(EPOCHS):
    for seqs, labels in train_ds:
        train_step(model, seqs, labels, loss_object, optimizer, train_loss, train_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100))

    train_loss.reset_states()
    train_accuracy.reset_states()

for test_seq, test_labels in test_ds:
    prediction = test_step(model, test_seq)
    test_text = tokenizer.sequences_to_texts(test_seq.numpy())
    gt_text = tokenizer.sequences_to_texts(test_labels.numpy())
    texts = tokenizer.sequences_to_texts(prediction.numpy())
    print('_')
    print('q: ', test_text)
    print('a: ', gt_text)
    print('p: ', texts)
   

Transfer learning

Beginner mode

import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

EPOCHS = 100

def MyModel():
    feat = tf.keras.applications.MobileNetV2(input_shape=(224, 224, 3),
                                             include_top=False)
    feat.trainable = False
    
    seq = tf.keras.models.Sequential()
    seq.add(feat) # h x w x c 
    seq.add(tf.keras.layers.GlobalAveragePooling2D()) # c
    seq.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    return seq

split = tfds.Split.TRAIN.subsplit(weighted=(8, 2))
dataset, meta = tfds.load('cats_vs_dogs',
                          split=list(split),
                          with_info=True,
                          as_supervised=True)

train_ds, test_ds = dataset

l2s = meta.features['label'].int2str
for img, label in test_ds.take(2):
    plt.figure()
    plt.imshow(img)
    plt.title(l2s(label))

def preprocess(img, label):
    img = tf.cast(img, tf.float32) / 255.0
    img = tf.image.resize(img, (224, 224))
    return img, label

train_ds = train_ds.map(preprocess).batch(32).prefetch(1024)
test_ds = test_ds.map(preprocess).batch(32).prefetch(1024)


model = MyModel()
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.fit(train_ds, validation_data=test_ds, epochs=EPOCHS)

Expert mode

Pytorch

tutorial

Regression

Simple Linear regression

import torch
import matplotlib.pyplot as plt

def cost():
    return torch.mean((W*X + b - Y) ** 2)

def W_grad():
    return torch.sum((W*X + b - Y) * X)

def b_grad():
    return torch.sum((W*X + b - Y) * 1)

# data
X = torch.FloatTensor([[1], [2], [3]])
Y = torch.FloatTensor([[1], [2], [3]])

# parameters
W = torch.zeros(1); b = torch.zeros(1);
alpha = 0.1; beta = 0.1;
fig, axes = plt.subplots(1,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    W -= alpha * W_grad(); print('W =', W.item())
    b -= beta * b_grad(); print('b =', b.item())
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost().item())
    axes[1].plot(X, W*X + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X, Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
plt.show()

with optimizer

import torch
import torch.optim as optim
import matplotlib.pyplot as plt

# data
X = torch.FloatTensor([[1], [2], [3]])
Y = torch.FloatTensor([[1], [2], [3]])

# parameters
W = torch.zeros(1, requires_grad=True); b = torch.zeros(1, requires_grad=True);
lr = 0.1;
fig, axes = plt.subplots(1,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
optimizer = optim.SGD([W, b], lr)
for i in range(epochs):
    hypothesis = W*X + b
    cost = torch.mean((hypothesis - Y) ** 2)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step(); print('W =', W.item(), 'b =', b.item())
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost.item())
    axes[1].plot(X, W.detach()*X + b.detach())
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X, Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
plt.show()

Multi-variable regression

with optimizer

import torch
import torch.optim as optim
import matplotlib.pyplot as plt

# data
X1 = torch.FloatTensor([[73], [93], [89], [96], [73]])
X2 = torch.FloatTensor([[80], [88], [91], [98], [66]])
X3 = torch.FloatTensor([[75], [93], [90], [100], [70]])
Y = torch.FloatTensor([[152], [185], [180], [196], [142]])

# parameters
W1 = torch.zeros(1, requires_grad=True)
W2 = torch.zeros(1, requires_grad=True)
W3 = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
lr=1e-5
fig, axes = plt.subplots(2,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
optimizer = optim.SGD([W1, W2, W3, b], lr)
for i in range(epochs):
    hypothesis = X1*W1 + X2*W2 + X3*W3 + b
    cost = torch.mean((hypothesis - Y) ** 2)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step(); print('W1 =',W1.item(),'W2 =',W2.item(),'W3 =',W3.item(),'b =',b.item())
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost.item())
    axes[0,1].plot(X1, W1.detach()*X1 + W2.detach()*X2 + W3.detach()*X3 + b.detach())
    axes[1,0].plot(X2, W1.detach()*X1 + W2.detach()*X2 + W3.detach()*X3 + b.detach())
    axes[1,1].plot(X3, W1.detach()*X1 + W2.detach()*X2 + W3.detach()*X3 + b.detach())
axes[0,0].plot(step, curr_cost, marker='o', ls='-')
axes[0,1].plot(X1, Y, 'x')
axes[1,0].plot(X2, Y, 'x')
axes[1,1].plot(X3, Y, 'x')
axes[0,0].grid(True)
axes[0,1].grid(True)
axes[1,0].grid(True)
axes[1,1].grid(True)
plt.show()

with optimizer, vectorization(matrix)

import torch
import torch.optim as optim
import matplotlib.pyplot as plt

# data
X = torch.FloatTensor([[73, 80, 75],
                       [93, 88, 93],
                       [89, 91, 90],
                       [96, 98, 100],
                       [73, 66, 70]])
Y = torch.FloatTensor([[152], [185], [180], [196], [142]])


# parameters
W = torch.zeros((3, 1), requires_grad=True); b = torch.zeros(1, requires_grad=True);
lr=1e-5
fig, axes = plt.subplots(2,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
optimizer = optim.SGD([W, b], lr)
for i in range(epochs):
    hypothesis = X.matmul(W) + b # or .mm or @
    cost = torch.mean((hypothesis - Y) ** 2)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step(); print('W =',W,'b =',b)
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost.item())
    axes[0,1].plot(X[:,0], X.matmul(W.detach()) + b.detach())
    axes[1,0].plot(X[:,1], X.matmul(W.detach()) + b.detach())
    axes[1,1].plot(X[:,2], X.matmul(W.detach()) + b.detach())
axes[0,0].plot(step, curr_cost, marker='o', ls='-')
axes[0,1].plot(X[:,0], Y, 'x')
axes[1,0].plot(X[:,1], Y, 'x')
axes[1,1].plot(X[:,2], Y, 'x')
axes[0,0].grid(True)
axes[0,1].grid(True)
axes[1,0].grid(True)
axes[1,1].grid(True)
plt.show()

Logistic regression

with optimizer, vectorization(matrix)

import torch
import torch.optim as optim
import matplotlib.pyplot as plt

# data
X = torch.FloatTensor([[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]])
Y = torch.FloatTensor([[0], [0], [0], [1], [1], [1]])

# parameters
W = torch.zeros((2, 1), requires_grad=True); b = torch.zeros(1, requires_grad=True);
lr=1
fig, axes = plt.subplots(1,3, figsize=(10,5))

# gradient descent
epochs = 10
curr_cost = []; step = [];
optimizer = optim.SGD([W, b], lr)
for i in range(epochs):
    hypothesis = torch.sigmoid(X.matmul(W) + b) # or .mm or @
    cost = -(Y * torch.log(hypothesis) + 
             (1 - Y) * torch.log(1 - hypothesis)).mean()

    optimizer.zero_grad()
    cost.backward()
    optimizer.step(); print('W =',W, 'b =',b)
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost.item())
    axes[1].plot(X[:,0], torch.sigmoid(X.matmul(W.detach()) + b.detach()))
    axes[2].plot(X[:,1], torch.sigmoid(X.matmul(W.detach()) + b.detach()))    
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[:,0], Y, 'x')
axes[2].plot(X[:,1], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

with optimizer, vectorization(matrix), functional

import torch
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

# data
X = torch.FloatTensor([[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]])
Y = torch.FloatTensor([[0], [0], [0], [1], [1], [1]])

# parameters
W = torch.zeros((2, 1), requires_grad=True); b = torch.zeros(1, requires_grad=True);
lr=1
fig, axes = plt.subplots(1,3, figsize=(10,5))

# gradient descent
epochs = 10
curr_cost = []; step = [];
optimizer = optim.SGD([W, b], lr)
for i in range(epochs):
    hypothesis = torch.sigmoid(X.matmul(W) + b) # or .mm or @
    cost = F.binary_cross_entropy(hypothesis, Y)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step(); print('W =',W, 'b =',b)
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost.item())
    axes[1].plot(X[:,0], torch.sigmoid(X.matmul(W.detach()) + b.detach()))
    axes[2].plot(X[:,1], torch.sigmoid(X.matmul(W.detach()) + b.detach()))    
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[:,0], Y, 'x')
axes[2].plot(X[:,1], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

Soft-max regression

Perceptron

XOR

FCN

import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import datasets, transforms


seed = 1

lr = 0.001
momentum = 0.5

batch_size = 64
test_batch_size = 64

epochs = 5

no_cuda = False
log_interval = 100


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
        
torch.manual_seed(seed)

use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}


train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)


test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=test_batch_size, shuffle=True, **kwargs)

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)


for epoch in range(1, epochs + 1):
    # Train Mode
    model.train()

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()  # backpropagation 계산하기 전에 0으로 기울기 계산
        output = model(data)
        loss = F.nll_loss(output, target)  # https://pytorch.org/docs/stable/nn.html#nll-loss
        loss.backward()  # 계산한 기울기를 
        optimizer.step()  

        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    
    # Test mode
    model.eval()  # batch norm이나 dropout 등을 train mode 변환
    test_loss = 0
    correct = 0
    with torch.no_grad():  # autograd engine, 즉 backpropagatin이나 gradient 계산 등을 꺼서 memory usage를 줄이고 속도를 높임
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()  # pred와 target과 같은지 확인

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

CNN

RNN

GAN

github

List of posts followed by this article

Reference

OUTPUT

6626070 2997924

AI03, Artificial neural networks

Contents

Resource

CPU

GPU

GPU Resource info

Deallocate memory on GPU

Allocate memory on GPU

tensorboard

Numpy

Regression

Simple Linear regression

Multi-variable regression

Logistic regression

Soft-max regression

FCN

FCN through numerical method

FCN through backpropagation

CNN

RNN

Tensorflow

Regression

Simple Linear regression

Multi-variable regression

Logistic regression

Soft-max regression

Perceptron

OR

XOR

FCN

Beginner mode

Expert mode

CNN

Beginner mode

Expert mode

CNN(DNN)

Beginner mode

Expert mode

RNN(LSTM)

Beginner mode

Expert mode

GAN

Beginner mode

Expert mode

ResNET

Beginner mode

Expert mode

Attention Net

Beginner mode

Expert mode

Transfer learning

Beginner mode

Expert mode

Pytorch

Regression

Simple Linear regression

Multi-variable regression

Logistic regression

Soft-max regression

Perceptron

XOR

FCN

CNN

RNN

GAN

6626070
2997924