tensorflow自定义网络层、激活函数（self

雪柳花明 2017-07-20

展开全文

[python] view plaincopy
# highly based on :  
# http:///questions/39921607/tensorflow-how-to-make-a-custom-activation-function-with-only-python  
# https://gist.github.com/harpone/3453185b41d8d985356cbe5e57d67342  
  
  
# making a numpy function to a tensorflow function:  
# we will use 1) tf.py_func(func, inp, Tout, stateful=stateful, name=name), https://www./api_docs/python/tf/py_func  
# which transforms any numpy function to a tensorflow function  
# we will use 2) tf.RegisterGradient  
# https://www./versions/r0.11/api_docs/python/framework/defining_new_operations#RegisterGradient  
# https://www./versions/r0.11/api_docs/python/framework/#RegisterGradient  
# we will use 3) tf.Graph.gradient_override_map  
# https://www./versions/r0.11/api_docs/python/framework/  
# https://www./versions/r0.11/api_docs/python/framework/core_graph_data_structures#Graph.gradient_override_map  
  
  
  
  
import numpy as np  
import tensorflow as tf  
from tensorflow.python.framework import ops  
  
  
# define common custom relu function  
def my_relu_def(x, threshold=0.05):  
    if x<threshold:  
        return 0.0  
    else:  
        return x  
  
def my_relu_grad_def(x, threshold=0.05):  
    if x<threshold:  
        return 0.0  
    else:  
        return 1.0  
  
# making a common function into a numpy function  
my_relu_np = np.vectorize(my_relu_def)  
my_relu_grad_np = np.vectorize(my_relu_grad_def)  
# numpy uses float64 but tensorflow uses float32  
my_relu_np_32 = lambda x: my_relu_np(x).astype(np.float32)  
my_relu_grad_np_32 = lambda x: my_relu_grad_np(x).astype(np.float32)  
  
  
  
def my_relu_grad_tf(x, name=None):  
    with ops.name_scope(name, "my_relu_grad_tf", [x]) as name:  
        y = tf.py_func(my_relu_grad_np_32,  
                       [x],  
                       [tf.float32],  
                       name=name,  
                       stateful=False)  
        return y[0]  
  
def my_py_func(func, inp, Tout, stateful=False, name=None, my_grad_func=None):  
    # Need to generate a unique name to avoid duplicates:  
    random_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))  
    tf.RegisterGradient(random_name)(my_grad_func)  # see _my_relu_grad for grad example  
    g = tf.get_default_graph()  
    with g.gradient_override_map({"PyFunc": random_name, "PyFuncStateless": random_name}):  
        return tf.py_func(func, inp, Tout, stateful=stateful, name=name)  
  
# The grad function we need to pass to the above my_py_func function takes a special form:  
# It needs to take in (an operation, the previous gradients before the operation)  
# and propagate(i.e., return) the gradients backward after the operation.  
def _my_relu_grad(op, pre_grad):  
    x = op.inputs[0]  
    cur_grad = my_relu_grad_tf(x)  
    next_grad = pre_grad * cur_grad  
    return next_grad  
  
def my_relu_tf(x, name=None):  
    with ops.name_scope(name, "my_relu_tf", [x]) as name:  
        y = my_py_func(my_relu_np_32,  
                       [x],  
                       [tf.float32],  
                       stateful=False,  
                       name=name,  
                       my_grad_func=_my_relu_grad)  # <-- here's the call to the gradient  
        return y[0]  
  
with tf.Session() as sess:  
    x = tf.constant([-0.3, 0.005, 0.08, 0.12])  
    y = my_relu_tf(x)  
    tf.global_variables_initializer().run()  
    print x.eval()  
    print y.eval()  
    print tf.gradients(y, [x])[0].eval()  
  
# [-0.30000001  0.005       0.08        0.12      ]  
# [ 0.    0.    0.08  0.12]  
# [ 0.    0.    1.  1.]  

# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops


# define common custom relu function
#定义常用的relu函数定义
def my_relu_def(x, threshold=0.05):
    if x<threshold:
        return 0.0
    else:
        return x

def my_relu_grad_def(x, threshold=0.05):
    if x<threshold:
        return 0.0
    else:
        return 1.0

# making a common function into a numpy function
#对numpy数组中的每一个元素应用一个函数
my_relu_np = np.vectorize(my_relu_def)
my_relu_grad_np = np.vectorize(my_relu_grad_def)

#类型转换，numpy使用float64,而Tensorflow使用float32
# numpy uses float64 but tensorflow uses float32
my_relu_np_32 = lambda x: my_relu_np(x).astype(np.float32)
my_relu_grad_np_32 = lambda x: my_relu_grad_np(x).astype(np.float32)


#定以tf版的my_relu_grad函数
def my_relu_grad_tf(x, name=None):
    with ops.name_scope(name, "my_relu_grad_tf", [x]) as name:
        y = tf.py_func(my_relu_grad_np_32,
                       [x],
                       [tf.float32],
                       name=name,
                       stateful=False)
        return y[0]

#定义函数
def my_py_func(func, inp, Tout, stateful=False, name=None, my_grad_func=None):
    # Need to generate a unique name to avoid duplicates:
    random_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
    tf.RegisterGradient(random_name)(my_grad_func)  # see _my_relu_grad for grad example
    g = tf.get_default_graph()
    with g.gradient_override_map({"PyFunc": random_name, "PyFuncStateless": random_name}):
        return tf.py_func(func, inp, Tout, stateful=stateful, name=name)

# 该梯度函数，通过上面的my_py_func计算并且传播，参数包括，一个op，一个前面的梯度
# The grad function we need to pass to the above my_py_func function takes a special form:
# It needs to take in (an operation, the previous gradients before the operation)
# and propagate(i.e., return) the gradients backward after the operation.
def _my_relu_grad(op, pre_grad):
    x = op.inputs[0]
    cur_grad = my_relu_grad_tf(x)
    next_grad = pre_grad * cur_grad
    return next_grad


def my_relu_tf(x, name=None):
    with ops.name_scope(name, "my_relu_tf", [x]) as name:
        y = my_py_func(my_relu_np_32,
                       [x],
                       [tf.float32],
                       stateful=False,
                       name=name,
                       my_grad_func=_my_relu_grad)  # <-- here's the call to the gradient
        return y[0]

with tf.Session() as sess:
    x = tf.constant([-0.3, 0.005, 0.08, 0.12])
    y = my_relu_tf(x)
    tf.global_variables_initializer().run()
    print x.eval()
    print y.eval()
    print tf.gradients(y, [x])[0].eval()