SPRAAK
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Groups Pages
Data Structures | Macros | Typedefs | Enumerations | Functions | Variables
nn_main.c File Reference

main routines and types for handling (deep) neural networks More...

Data Structures

struct  SprNNOpInfo
 
union  _Union1_NN_MAIN_
 
struct  SprNNOp
 one operation on the connections More...
 
struct  SprNNLayer
 a NN layer More...
 
struct  SprNNConnect
 connection between layers (contains operations) More...
 
struct  SprNN
 
struct  SprNNOptions
 
union  SprNNIDo
 
struct  _Struct3_NN_MAIN_
 
struct  SprNNIWS
 the NN interface work space More...
 

Macros

#define SPR_NN_VEC_SZ
 
#define SPR_NN_VEC_AS
 
#define SprNNFltV_
 
#define SprNNFltA_
 
#define SprNNIntV_
 
#define SprNNIntA_
 
#define SprNNUIntV_
 
#define SprNNUIntA_
 
#define SPR_NN_N_PARALLEL
 compute SPR_NN_N_PARALLEL input together in 'parallel' mode (faster) More...
 
#define spr_nn_dt_flt
 
#define SPR_NN_OP_MAX
 
#define SPR_NN_OP_REQ_I(CB_flags)
 
#define SPR_NN_OP_REQ_O(CB_flags)
 

Typedefs

typedef float SprNNFltS
 a single floating point parameter More...
 
typedef int32_t SprNNIntS
 this type encodes the corresponding int type More...
 
typedef uint32_t SprNNUIntS
 this type encodes the corresponding int type More...
 
typedef SprNNFltV_ SprNNFltV
 a vector of floating point parameters More...
 
typedef SprNNIntV_ SprNNIntV
 this type encodes the corresponding int type More...
 
typedef SprNNUIntV_ SprNNUIntV
 this type encodes the corresponding unsigned int type More...
 
typedef SprNNFltA_ SprNNFltA
 
typedef SprNNIntA_ SprNNIntA
 this type encodes the corresponding int type More...
 
typedef SprNNUIntA_ SprNNUIntA
 this type encodes the corresponding unsigned int type More...
 
typedef struct SprNNStreamO_t SprNNStreamO
 
typedef struct SprNNStreamC_t SprNNStreamC
 close an open stream More...
 
typedef struct SprNNIOEl_t SprNNIOEl
 
typedef SprNNFltS(* SprNNfwdS )(SprNNFltS x, char *restrict p)
 
typedef SprNNFltV(* SprNNfwdV )(SprNNFltV x, char *restrict p)
 
typedef SprNNFltS(* SprNNbwdPS )(SprNNFltS dE, void *restrict p)
 
typedef SprNNFltV(* SprNNbwdPV )(SprNNFltV dE, void *restrict p)
 
typedef SprNNFltS(* SprNNbwdBS )(SprNNFltS x, void *restrict p)
 bwd evaluation, compute dp, process one scalar input element More...
 
typedef SprNNFltV(* SprNNbwdBV )(SprNNFltV x, void *restrict p)
 bwd evaluation, compute dp, process one vector input element More...
 
typedef SprNNOp *(* SprNNmodif )(SprNNOp *restrict op)
 change behavior based on the arguments and based on the next operation More...
 
typedef struct SprNNI_t SprNNI
 NN interface. More...
 
typedef int(* SprNNfast )(const SprNNConnect *restrict connect, unsigned int flags, SprNNIWS *ws)
 change behavior based on the arguments and based on the next operation More...
 
typedef int(* SprNNIDoFunc )(const SprNNIDo *restrict args, int pos)
 
typedef void *(* SprNNDataIn )(void *restrict layer_val, void *restrict src, int Nel)
 function point to handle the input data More...
 
typedef void *(* SprNNDataOut )(void *restrict dst, const void *restrict layer_val, int Nel)
 function point to handle the output data More...
 

Enumerations

enum  {
  SPR_NN_EVAL, SPR_NN_TRAIN, SPR_NN_NOVECTOR, SPR_NN_NOSCALAR,
  SPR_NN_PARALLEL, SPR_NN_ASYNC, SPR_NN_TRAIN2, SPR_NN_NOWARN
}
 
enum  { SPR_NN_SSP_READ }
 
enum  {
  SPR_NN_CT_DIRECT, SPR_NN_CT_FULL, SPR_NN_CT_SPARSE, SPR_NN_CT_SELECT,
  SPR_NN_CT_TREE, SPR_NN_CT_WEIGTH, SPR_NN_CT_MERGE
}
 
enum  {
  SPR_NN_OP_C, SPR_NN_OP_B, SPR_NN_OP_T, SPR_NN_OP_P,
  SPR_NN_OP_HAVE_PARAM
}
 
enum  { SPR_NN_OP_I, SPR_NN_OP_O }
 
enum  { SPR_NN_OP_NOP }
 
enum  {
  SPR_NN_OP_REPLACE_PARAM, SPR_NN_OP_INT, SPR_NN_OP_DEF_VAL, SPR_NN_OP_REQUIRED_PARAM,
  SPR_NN_OP_DEF_VAL_P0, SPR_NN_OP_DEF_VAL_P1, SPR_NN_OP_DEF_VAL_P2, SPR_NN_OP_DEF_VAL_M1,
  SPR_NN_OP_DEF_VAL_PH, SPR_NN_OP_DEF_VAL_MSK, SPR_NN_OP_ARG_END
}
 
enum  { SPR_NN_LAYER_BIAS, SPR_NN_LAYER_GRAD, SPR_NN_LAYER_INPUT, SPR_NN_LAYER_OUTPUT }
 
enum  { SPR_NNI_DO_EVAL, SPR_NNI_DO_TRAIN_FWD, SPR_NNI_DO_TRAIN_BWD, SPR_NNI_DO_ARGS }
 

Functions

int spr_nn_dump (SprStream *fd, const SprNN *restrict nn)
 
unsigned int spr_nn_decode_options (const char *desc, int *MT)
 
SprNNspr_nn_free (SprNN *nn)
 
SprNNspr_nn_init (SprStream *fd, const char *fname, int flags, SprVarlist *vars)
 
void * spr_nn_data_in_memcpy (void *restrict layer_val, void *restrict src, int Nel)
 a standard implementation to handle the input data More...
 
void * spr_nn_data_out_memcpy (void *restrict dst, const void *restrict layer_val, int Nel)
 a standard implementation to handle the output data More...
 
void * spr_nn_data_out_null (void *restrict dst, const void *restrict layer_val, int Nel)
 ignore the output data (flush the system) More...
 
int spr_nni_feed_input (SprNNI *restrict nni, void *restrict data, SprNNDataIn func_get)
 
int spr_nni_read_output (SprNNI *restrict nni, int block, void *restrict data, SprNNDataOut func_put)
 
SprNNIspr_nni_free (SprNNI *restrict nni)
 
void * spr_nni_get_const_space (SprNNIWS *restrict ws, int vec, int sz)
 
void spr_nni_unget_code_space (SprNNIWS *restrict ws, int sz)
 Give the last sz code elements back to the buffer. More...
 
SprNNIDo * spr_nni_get_code_space (SprNNIWS *restrict ws, int sz)
 
SprNNIspr_nn_interface (SprNN *restrict nn, int flags, int MT)
 

Variables

const SprCmdOptDesc spr_nn_option_desc []
 

Detailed Description

main routines and types for handling (deep) neural networks

The main routines and types for (deep) neural networks.

An neural network (NN) consist of different layers with some interconnection between them. Every connection (arc) between a node in the source layer and a node in the destination layer consist of a sequence of linear and non-linear functions. Output layers can be sub-divided into continuous regions – this allows information from multiple sub-models (input layers) to be merged. The sequence of functions on the arcs (and the definition of which parameters are trainable) is the same for all arcs in a group, the group being defined by the input layers and (the region withing) the output layer. Arcs typically have one or more free (trainable) parameters. These parameters can be tied across groups, and in the special case of a simple 1-to-1 connection scheme also between all arcs in the group. Functions may have extra arguments which are specified between braces '()', seperated by commas. The allowed number of arguments, their type (float, integer or string) and the effect they have, depend on the function. In some cases, some of the immediate function arguments replace one or more of the free (arc specific) parameters. Functions also can have some options, e.g. to not update the parameters during training, or to specify parameter tying.

The available linear and non-linear functions and their arguments are:

nop
No operation (copy unchanged).
scale
Multiply the input with a factor.
scaleR(r=1)
Scaling, training with a regularisation cost of (0.5*w^2)*r.
scale(a)
Multiply the input with a constant a.
bias
Add an offset to the input.
biasR(r=1)
Offset, training with a regularisation cost of (0.5*w^2)*r.
bias(c)
Add a constant c to the input.
const
Replace the input with a trainable value .
const(c)
Replace the input with a constant c.
poly1
Evaluate a*x+b
poly1R(r=1)
Scaling+offset, training with a regularisation cost of (0.5*w^2)*r
poly1(a,c)
Evaluate a*x+c, a and c being constants
poly
Evaluate a polynomial in x; the first parameter is the order of the polynomial, followed by the polynomial coefficients ordered from high to low order (x^N ... x^0). The order of the polynomial is non-trainable but can be set differently for each arc.
poly(p)
Evaluate a polynomial of order p in x; the polynomial coefficients must be stored in high to low order (x^p ... x^0).
sigm(a=1,p=1)
Evaluate a sigmoide (1/(1+exp(-a*x))), with an optional scaling of the input with a. A scale factor a equal to 0.0 indicates that the scaling factor is a trainable parameter. The parameter p defines the precision (and hence speed) of the computation (0 being the most exact, and higher numbers sacrificing accuracy for speed).
tanh(a=1,p=1)
Evaluate a tangent hyperbolicus (tanh(a*x)), with an optional scaling of the input with a. A scale factor a equal to 0.0 indicates that the scaling factor is a trainable parameter. The parameter p defines the precision (and hence speed) of the computation (0 being the most exact, and higher numbers sacrificing accuracy for speed).
rop1(a=1,b=1,c=0)
Evaluate f(x*a)*b+c with f() a ratio of first order polynomials designed to mimic the behavious of tanh() – f(x)=x/(|x|+1).
rop2(a=1,b=1,c=0)
Evaluate f(x*a)*b+c with f() a ratio of second order polynomials designed to mimic the behavious of tanh() – f(x)=(x*|x|+x)/(|x|^2+|x|+1).
rop3(a=1,b=1,c=0)
Evaluate f(x*a)*b+c with f() a ratio of third order polynomials designed to mimic the behavious of tanh() – f(x)=(x^3+x*|x|+1)/(|x|^3+|x|^2+|x|+1).
sign(l=0,p=1,n=-1)
Function that outputs p if x >= l and outputs n otherwise; this function is not derivable and can thus not be trained!
abs(a=1,c=0)
Evaluate a*|x|+c, a and c being constants.
clip(l=-1,h=1)
Clip the input to the interval [l,h].
rlu(p=1,n=0)
Evaluate ((x>=0.0)?p:n)*x.
rlu1
Evaluate ((x>=0.0)?1:b)*x, with b a trainable parameter.
rlu2
Evaluate ((x>=0.0)?a:b)*x, with (a,b) trainable parameters.
lsigm(c=0)
Evaluate log(1+exp(x/(1+|x*c|))). This behaves as a smooth version of the rlu non-linearity if c=0, and morphs into (a smooth version of) sign(x)/(2c)+log(2) if c is a large positive number.
dropout(p=0.5,a=-1,b=-1,c=-1,d=-1)
Arbitrarily turn the propagation of information on or off. The parameter p controls the fraction of the time the output is turned on. The parameters a, b and c are the seeds used to initialize the RNG. To have a working RNG, a, b and c should not be in the interval [0,1], [0...7] and [0...15] respectively!
dist2
Evaluate (a*x+b)^2, (a,b) being the trainable parameters
dist1
Evaluate |a*x+b|, (a,b) being the trainable parameters
dist2C
Evaluate (x+b)^2, b being the trainable parameter
dist1C
Evaluate |x+b|, b being the trainable parameter
dist2p
Evaluate (a*x+b)^2-log(|a|+eps), (a,b) being the trainable parameters; this form the basis for a multi-variate Gaussian distribution with a diagonal covariance
dist1p
Evaluate |a*x+b|-log(|a|+eps)/2, (a,b) being the trainable parameters; this form the basis for a multi-variate Laplace distribution with a diagonal covariance
exp(a=1,b=1,c=0)
Evaluate exp(x*a)*b+c. If a == b == c == 0, then a and b become trainable parameters. If a == c == 0 and b == 1, then a becomes a trainable parameter.
pae1(a=1,b=1,c=0)
Evaluate f(x*a)*b+c with f() an approximaption of exp() using first order polynomials – xp=max(x,0), xn=max(-x,0), f(x)=xp+1/(xn+1).
pae2(a=1,b=1,c=0)
Evaluate f(x*a)*b+c with f() an approximaption of exp() using second order polynomials – xp=max(x,0), xn=max(-x,0), f(x)=xp^2/2+xp+1/(xn^2/2+xn+1).
pae3(a=1,b=1,c=0)
Evaluate f(x*a)*b+c with f() an approximaption of exp() using third order polynomials – xp=max(x,0), xn=max(-x,0), f(x)=xp^3/6+xp^2/2+xp+1/(xn^3/6+xn^2/2+xn+1).
log(a=1,b=1,c=0)
Evaluate log(|x+c|)*a+b.
pow
Evaluate sign(x)*|x|^|p| with p the trainable parameter, i.e. raise x to a certain power with the sign of x being preserved.
pow(a=1,p,c=0)
Evaluate a*sign(x)*((|x|+|c|)^p-|c|^p), with a, p and c constants
pow2(a=1,c=0)

Evaluate a*x^2+c, a and c being constants.

norm
Asumes a sequence "LayerX -> full(trans):scale -> LayerY -> direct:norm". The 'norm' function normalizes the preceeding scaling operation so that one obtains an inner product of the input vector (layerX) with a unit vector (the 'scale' parameters). Note: a bias added in the full:scale connection is removed before the scaling and is not included when computing the unit vector.
merge_sum(layer)
Combine the values x and z read from the input layer and the layer called layer respectively into one output value y. The output y equals the weighted sum y=x+z*p, with p a trainable parameter.
merge_sum(layer,p)
Combine the values x and z read from the input layer and the layer called layer respectively into one output value y. The output y equals the weighted sum y=x+y*p, p being a constant.
merge_mul(layer)
Combine the values x and z read from the input layer and the layer called layer respectively into one output value y. The output y equals the product of both inputs after raising them to a certain power y=x*sign(z)*|z|^|p|, with p a trainable parameter.
merge_mul(layer,p,c=0)
Combine the values x and z read from the input layer and the layer called layer respectively into one output value y. The output y equals the product of both inputs after raising them to a certain power y=x*sign(z)*((|z|+|c|)^p-|c|^p)), p and c being constants.
weight_mul(layer)
Multiply the input x with the weight w read from the layer called layer: y=x*w.
weight_pow(layer)
Raise the input x to the power w, w being a weight read from the layer called layer: y=sign(x)*|x|^|w|).
set_weight(c=0,a=1,b=1)
Set the weights for the two children in a tree evaluation. Input values smaller than or equal to -<-a> are mapped to a weight of 1.0 and 0.0 for the left and right sub-tree respectively. Input values larger than or equal to +b are mapped to a weight of 0.0 (left sub-tree) and 1.0 (right sub-tree). An input value of 0.0 is mapped to an equal weight of 0.5 for both the left and right sub-tree. All other values in the range [-a,b] are mapped to intermediate values using a smooth and continous curve. The parameter c must be set to a value in the range ]-1.0,1.0] and controls the smoothness of the curve around the corner points -a and b. A value if 1.0 assures a smooth transition (sigmoid-alike curve). A value close to -1.0 give rise to a very fast step-alike transition from 0.5 to 1.0 around the two corner points. The left/right sub-tree is only evaluated if the corresponding weight is non-zero.
expR1(r=1,c=0,a=1,b=1)
Exponent with a regularisation cost on the output values y of (0.5*(y-c)^2)*r.
expR2(r=1,c=0,a=1,b=1)
Exponent with a regularisation cost on the input values x of (0.5*x^2)*r.
sigmR1(r=1,a=1)
Sigmoide with an optional scaling of the input with a, training with a regularisation cost of (.25-dsigm(x)/dx)*r.
tanhR1(r=1,a=1)
Tanh with an optional scaling of the input with a, training with a regularisation cost of (1.0-dtanh(x)/dx)*r.
sigmR2(r=1,a=1)
Sigmoide with an optional scaling of the input with a, training with a regularisation cost of r/(1+exp(256/x^2)).
tanhR2(r=1,a=1)
Tanh with an optional scaling of the input with a, training with a regularisation cost of r/(1+exp(64/x^2)).
sigmR3(r=1,a=1)
Sigmoide with an optional scaling of the input with a, training with a regularisation cost of (0.5*x^2)*r.
tanhR3(r=1,a=1)
Tanh with an optional scaling of the input with a, training with a regularisation cost of (0.5*x^2)*r.

The available connection types are:

direct
A 1-to-1 connection. This connection type may also use tied parameters for all arcs.
softmax
Compute the exponent of all inputs and normalize this output vector so that the sum equals to 1.0 (a probability distribution).
norm
Divide all inputs by the 1, 2, or inf(-1) norm of the inputs.
full
A full connection: every output node is connected with all input nodes. The results of all incomming connections are added.
sparse
A sparse connection between input and output layer. The individual connections are enumerated. See below for a description of the format.
tree
Identical to a full connection, only the evalutation order differs. A tree connection has a hierarchical order (binary tree) in which only one of the two descendents is evaluated, except when a point falls in a transition region. In this case, both descendents are evaluated. See below for a description of the format.

The MLP description file has the following structure:

[layers]
  Input         <nr_input_nodes>
  <layer_name>  <nr_nodes>
  Output        <nr_output_nodes>
[connections]
  <from>[+] <to>[<range>] <type>[(<alt_opt>)] [ndx_file] <param_file> <functions>
  ...
[options]
  <options>

A layer thus has a unique name and a size (number of nodes). A connection is described with a source and destination layer, an optional range of nodes in the output layer (continuous, non-verlapping), a connection type and a sequence of functions.
The optional '+' that may follow the name of the input layer indicates that one extra bias node (with a value of 1.0) must be added as extra input at the end of the input layer. The connection type has the following format:

<type>(<alt_opt>)

The <alt_opt> is optional and modifies the default behaviour of the connection type. The following connection types are available:

direct(shared)
one-to-one connection, optionally the parameters are shared.
full(trans)
full connection (each node to each node), optionally the parameters are stored in a transposed order (faster evaluation, conformant to the parameter layout for tree evaluation).
parse(excl)
sparse connections (each input node connects to a selected set of output nodes), the 'excl' specifier should be present when each output node has only one incomming arc.
tree(<nsd>,<buf>)
tree structured layer, optionally parts of the non selected sub-tree are evaluated also: the <nsd> option indicates that the non-selected sub-tree should be evaluated to a depth <nsd>, the <buf> option specifies a layer with weights for which any sub-tree with a non-zero weight will be evaluated.

The functions are described as follows:

<name>[<train_arg>](<extra_args>)

The train arguments <train_arg> are optional. They are specified between square brackets '[]' and consist of the letters:

T or C
Parameters that need to be (T)rained or parameters that are fixed (C)onstants.
P or B
To either (P)ropagate the error to the previous layer, or to (B)lock the error back propagation.

The extra arguments are specified between braces '()', seperated by commas. For the list allowed arguments per function, their type (float, integer or string) and the effect they have depend on the function, see above.

The index file that specifies the sparse connectivity consists of the concattenation of (-1) terminated arrays (of the type I32) listing the set of outputs for each input. For example, the following indices

[0 2 -1 ...
1 2 -1
3 -1]

describe the connections of a layer that transforms 3 inputs into 4 outputs with the following connectivity:

[1 0 0 1
0 1 0 1
0 0 1 0]

The binary tree (tree connection type) has the following properties:

The tree structure is stored as a two valued (of the type I32) tuple per node. The first value contains the (right) child information:

<has_left_child>*1 + <has_right_child>*2 + <offset_to_right_child>*4

with offset_to_right_child equal to 0 if the node does not have a right child. The second value contains the parent information:

<is_left_child>*1 + <is_right_child>*2 + <ndx_of_parent_node_base0>*4

with ndx_of_parent_node_base0, is_left_child and is_right_child equal to -1, 0 and 0 respectively for the root node.

Note
The dropout function and the set_weight functions return exact 0.0 values for non-active outputs. The normal 0.0 value is replaced with a very small positive value.
Date
Jan 1999
Author
Kris Demuynck
Revision History:
XX/01/1999 - KD
Creation
13/04/2010 - KD
added to SPRAAK
01/10/2012 - KD
clean-up, documentation, added new functions
04/12/2015 - KD
new version with new functionality, speed-up, ... (derived from the old mlp software)