main routines and types for handling (deep) neural networks More...

Data Structures
struct	SprNNOpInfo

union	_Union1_NN_MAIN_

struct	SprNNOp
	one operation on the connections More...

struct	SprNNLayer
	a NN layer More...

struct	SprNNConnect
	connection between layers (contains operations) More...

struct	SprNN

struct	SprNNOptions

union	SprNNIDo

struct	_Struct3_NN_MAIN_

struct	SprNNIWS
	the NN interface work space More...

Macros
#define	SPR_NN_VEC_SZ

#define	SPR_NN_VEC_AS

#define	SprNNFltV_

#define	SprNNFltA_

#define	SprNNIntV_

#define	SprNNIntA_

#define	SprNNUIntV_

#define	SprNNUIntA_

#define	SPR_NN_N_PARALLEL
	compute SPR_NN_N_PARALLEL input together in 'parallel' mode (faster) More...

#define	spr_nn_dt_flt

#define	SPR_NN_OP_MAX

#define	SPR_NN_OP_REQ_I(CB_flags)

#define	SPR_NN_OP_REQ_O(CB_flags)

Typedefs
typedef float	SprNNFltS
	a single floating point parameter More...

typedef int32_t	SprNNIntS
	this type encodes the corresponding int type More...

typedef uint32_t	SprNNUIntS
	this type encodes the corresponding int type More...

typedef SprNNFltV_	SprNNFltV
	a vector of floating point parameters More...

typedef SprNNIntV_	SprNNIntV
	this type encodes the corresponding int type More...

typedef SprNNUIntV_	SprNNUIntV
	this type encodes the corresponding unsigned int type More...

typedef SprNNFltA_	SprNNFltA

typedef SprNNIntA_	SprNNIntA
	this type encodes the corresponding int type More...

typedef SprNNUIntA_	SprNNUIntA
	this type encodes the corresponding unsigned int type More...

typedef struct SprNNStreamO_t	SprNNStreamO

typedef struct SprNNStreamC_t	SprNNStreamC
	close an open stream More...

typedef struct SprNNIOEl_t	SprNNIOEl

typedef SprNNFltS(*	SprNNfwdS )(SprNNFltS x, char *restrict p)

typedef SprNNFltV(*	SprNNfwdV )(SprNNFltV x, char *restrict p)

typedef SprNNFltS(*	SprNNbwdPS )(SprNNFltS dE, void *restrict p)

typedef SprNNFltV(*	SprNNbwdPV )(SprNNFltV dE, void *restrict p)

typedef SprNNFltS(*	SprNNbwdBS )(SprNNFltS x, void *restrict p)
	bwd evaluation, compute dp, process one scalar input element More...

typedef SprNNFltV(*	SprNNbwdBV )(SprNNFltV x, void *restrict p)
	bwd evaluation, compute dp, process one vector input element More...

typedef SprNNOp (	SprNNmodif )(SprNNOp *restrict op)
	change behavior based on the arguments and based on the next operation More...

typedef struct SprNNI_t	SprNNI
	NN interface. More...

typedef int(*	SprNNfast )(const SprNNConnect restrict connect, unsigned int flags, SprNNIWS ws)
	change behavior based on the arguments and based on the next operation More...

typedef int(*	SprNNIDoFunc )(const SprNNIDo *restrict args, int pos)

typedef void (	SprNNDataIn )(void restrict layer_val, void restrict src, int Nel)
	function point to handle the input data More...

typedef void (	SprNNDataOut )(void restrict dst, const void restrict layer_val, int Nel)
	function point to handle the output data More...

Enumerations
enum	{ SPR_NN_EVAL, SPR_NN_TRAIN, SPR_NN_NOVECTOR, SPR_NN_NOSCALAR, SPR_NN_PARALLEL, SPR_NN_ASYNC, SPR_NN_TRAIN2, SPR_NN_NOWARN }

enum	{ SPR_NN_SSP_READ }

enum	{ SPR_NN_CT_DIRECT, SPR_NN_CT_FULL, SPR_NN_CT_SPARSE, SPR_NN_CT_SELECT, SPR_NN_CT_TREE, SPR_NN_CT_WEIGTH, SPR_NN_CT_MERGE }

enum	{ SPR_NN_OP_C, SPR_NN_OP_B, SPR_NN_OP_T, SPR_NN_OP_P, SPR_NN_OP_HAVE_PARAM }

enum	{ SPR_NN_OP_I, SPR_NN_OP_O }

enum	{ SPR_NN_OP_NOP }

enum	{ SPR_NN_OP_REPLACE_PARAM, SPR_NN_OP_INT, SPR_NN_OP_DEF_VAL, SPR_NN_OP_REQUIRED_PARAM, SPR_NN_OP_DEF_VAL_P0, SPR_NN_OP_DEF_VAL_P1, SPR_NN_OP_DEF_VAL_P2, SPR_NN_OP_DEF_VAL_M1, SPR_NN_OP_DEF_VAL_PH, SPR_NN_OP_DEF_VAL_MSK, SPR_NN_OP_ARG_END }

enum	{ SPR_NN_LAYER_BIAS, SPR_NN_LAYER_GRAD, SPR_NN_LAYER_INPUT, SPR_NN_LAYER_OUTPUT }

enum	{ SPR_NNI_DO_EVAL, SPR_NNI_DO_TRAIN_FWD, SPR_NNI_DO_TRAIN_BWD, SPR_NNI_DO_ARGS }

Functions
int	spr_nn_dump (SprStream fd, const SprNN restrict nn)

unsigned int	spr_nn_decode_options (const char desc, int MT)

SprNN *	spr_nn_free (SprNN *nn)

SprNN *	spr_nn_init (SprStream fd, const char fname, int flags, SprVarlist *vars)

void *	spr_nn_data_in_memcpy (void restrict layer_val, void restrict src, int Nel)
	a standard implementation to handle the input data More...

void *	spr_nn_data_out_memcpy (void restrict dst, const void restrict layer_val, int Nel)
	a standard implementation to handle the output data More...

void *	spr_nn_data_out_null (void restrict dst, const void restrict layer_val, int Nel)
	ignore the output data (flush the system) More...

int	spr_nni_feed_input (SprNNI restrict nni, void restrict data, SprNNDataIn func_get)

int	spr_nni_read_output (SprNNI restrict nni, int block, void restrict data, SprNNDataOut func_put)

SprNNI *	spr_nni_free (SprNNI *restrict nni)

void *	spr_nni_get_const_space (SprNNIWS *restrict ws, int vec, int sz)

void	spr_nni_unget_code_space (SprNNIWS *restrict ws, int sz)
	Give the last sz code elements back to the buffer. More...

SprNNIDo *	spr_nni_get_code_space (SprNNIWS *restrict ws, int sz)

SprNNI *	spr_nn_interface (SprNN *restrict nn, int flags, int MT)

Variables
const SprCmdOptDesc	spr_nn_option_desc []

Detailed Description

main routines and types for handling (deep) neural networks

The main routines and types for (deep) neural networks.

An neural network (NN) consist of different layers with some interconnection between them. Every connection (arc) between a node in the source layer and a node in the destination layer consist of a sequence of linear and non-linear functions. Output layers can be sub-divided into continuous regions – this allows information from multiple sub-models (input layers) to be merged. The sequence of functions on the arcs (and the definition of which parameters are trainable) is the same for all arcs in a group, the group being defined by the input layers and (the region withing) the output layer. Arcs typically have one or more free (trainable) parameters. These parameters can be tied across groups, and in the special case of a simple 1-to-1 connection scheme also between all arcs in the group. Functions may have extra arguments which are specified between braces '()', seperated by commas. The allowed number of arguments, their type (float, integer or string) and the effect they have, depend on the function. In some cases, some of the immediate function arguments replace one or more of the free (arc specific) parameters. Functions also can have some options, e.g. to not update the parameters during training, or to specify parameter tying.

The available linear and non-linear functions and their arguments are:

nop: No operation (copy unchanged).
scale: Multiply the input with a factor.
scaleR(r=1): Scaling, training with a regularisation cost of (0.5*w^2)*r.
scale(a): Multiply the input with a constant a.
bias: Add an offset to the input.
biasR(r=1): Offset, training with a regularisation cost of (0.5*w^2)*r.
bias(c): Add a constant c to the input.
const: Replace the input with a trainable value .
const(c): Replace the input with a constant c.
poly1: Evaluate a*x+b
poly1R(r=1): Scaling+offset, training with a regularisation cost of (0.5*w^2)*r
poly1(a,c): Evaluate a*x+c, a and c being constants
poly: Evaluate a polynomial in x; the first parameter is the order of the polynomial, followed by the polynomial coefficients ordered from high to low order (x^N ... x^0). The order of the polynomial is non-trainable but can be set differently for each arc.
poly(p): Evaluate a polynomial of order p in x; the polynomial coefficients must be stored in high to low order (x^p ... x^0).
sigm(a=1,p=1): Evaluate a sigmoide (1/(1+exp(-a*x))), with an optional scaling of the input with a. A scale factor a equal to 0.0 indicates that the scaling factor is a trainable parameter. The parameter p defines the precision (and hence speed) of the computation (0 being the most exact, and higher numbers sacrificing accuracy for speed).
tanh(a=1,p=1): Evaluate a tangent hyperbolicus (tanh(a*x)), with an optional scaling of the input with a. A scale factor a equal to 0.0 indicates that the scaling factor is a trainable parameter. The parameter p defines the precision (and hence speed) of the computation (0 being the most exact, and higher numbers sacrificing accuracy for speed).
rop1(a=1,b=1,c=0): Evaluate f(x*a)*b+c with f() a ratio of first order polynomials designed to mimic the behavious of tanh() – f(x)=x/(|x|+1).
rop2(a=1,b=1,c=0): Evaluate f(x*a)*b+c with f() a ratio of second order polynomials designed to mimic the behavious of tanh() – f(x)=(x*|x|+x)/(|x|^2+|x|+1).
rop3(a=1,b=1,c=0): Evaluate f(x*a)*b+c with f() a ratio of third order polynomials designed to mimic the behavious of tanh() – f(x)=(x^3+x*|x|+1)/(|x|^3+|x|^2+|x|+1).
sign(l=0,p=1,n=-1): Function that outputs p if x >= l and outputs n otherwise; this function is not derivable and can thus not be trained!
abs(a=1,c=0): Evaluate a*|x|+c, a and c being constants.
clip(l=-1,h=1): Clip the input to the interval [l,h].

rlu(p=1,n=0)

Evaluate ((x>=0.0)?p:n)*x.

rlu1

Evaluate ((x>=0.0)?1:b)*x, with b a trainable parameter.

rlu2

Evaluate ((x>=0.0)?a:b)*x, with (a,b) trainable parameters.

lsigm(c=0)

Evaluate log(1+exp(x/(1+|x*c|))). This behaves as a smooth version of the rlu non-linearity if c=0, and morphs into (a smooth version of) sign(x)/(2c)+log(2) if c is a large positive number.

dropout(p=0.5,a=-1,b=-1,c=-1,d=-1)

Arbitrarily turn the propagation of information on or off. The parameter p controls the fraction of the time the output is turned on. The parameters a, b and c are the seeds used to initialize the RNG. To have a working RNG, a, b and c should not be in the interval [0,1], [0...7] and [0...15] respectively!

dist2

Evaluate (a*x+b)^2, (a,b) being the trainable parameters

dist1

Evaluate |a*x+b|, (a,b) being the trainable parameters

dist2C

Evaluate (x+b)^2, b being the trainable parameter

dist1C

Evaluate |x+b|, b being the trainable parameter

dist2p

Evaluate (a*x+b)^2-log(|a|+eps), (a,b) being the trainable parameters; this form the basis for a multi-variate Gaussian distribution with a diagonal covariance

dist1p

Evaluate |a*x+b|-log(|a|+eps)/2, (a,b) being the trainable parameters; this form the basis for a multi-variate Laplace distribution with a diagonal covariance

exp(a=1,b=1,c=0)

Evaluate exp(x*a)*b+c. If a == b == c == 0, then a and b become trainable parameters. If a == c == 0 and b == 1, then a becomes a trainable parameter.

pae1(a=1,b=1,c=0)

Evaluate f(x*a)*b+c with f() an approximaption of exp() using first order polynomials – xp=max(x,0), xn=max(-x,0), f(x)=xp+1/(xn+1).

pae2(a=1,b=1,c=0)

Evaluate f(x*a)*b+c with f() an approximaption of exp() using second order polynomials – xp=max(x,0), xn=max(-x,0), f(x)=xp^2/2+xp+1/(xn^2/2+xn+1).

pae3(a=1,b=1,c=0)

Evaluate f(x*a)*b+c with f() an approximaption of exp() using third order polynomials – xp=max(x,0), xn=max(-x,0), f(x)=xp^3/6+xp^2/2+xp+1/(xn^3/6+xn^2/2+xn+1).

log(a=1,b=1,c=0)

Evaluate log(|x+c|)*a+b.

pow

Evaluate sign(x)*|x|^|p| with p the trainable parameter, i.e. raise x to a certain power with the sign of x being preserved.

pow(a=1,p,c=0)

Evaluate a*sign(x)*((|x|+|c|)^p-|c|^p), with a, p and c constants

pow2(a=1,c=0)

Evaluate a*x^2+c, a and c being constants.

norm

Asumes a sequence "LayerX -> full(trans):scale -> LayerY -> direct:norm". The 'norm' function normalizes the preceeding scaling operation so that one obtains an inner product of the input vector (layerX) with a unit vector (the 'scale' parameters). Note: a bias added in the full:scale connection is removed before the scaling and is not included when computing the unit vector.

merge_sum(layer)

Combine the values x and z read from the input layer and the layer called layer respectively into one output value y. The output y equals the weighted sum y=x+z*p, with p a trainable parameter.

merge_sum(layer,p)

Combine the values x and z read from the input layer and the layer called layer respectively into one output value y. The output y equals the weighted sum y=x+y*p, p being a constant.

merge_mul(layer)

Combine the values x and z read from the input layer and the layer called layer respectively into one output value y. The output y equals the product of both inputs after raising them to a certain power y=x*sign(z)*|z|^|p|, with p a trainable parameter.

merge_mul(layer,p,c=0)

Combine the values x and z read from the input layer and the layer called layer respectively into one output value y. The output y equals the product of both inputs after raising them to a certain power y=x*sign(z)*((|z|+|c|)^p-|c|^p)), p and c being constants.

weight_mul(layer)

Multiply the input x with the weight w read from the layer called layer: y=x*w.

weight_pow(layer)

Raise the input x to the power w, w being a weight read from the layer called layer: y=sign(x)*|x|^|w|).

set_weight(c=0,a=1,b=1)

Set the weights for the two children in a tree evaluation. Input values smaller than or equal to -<-a> are mapped to a weight of 1.0 and 0.0 for the left and right sub-tree respectively. Input values larger than or equal to +b are mapped to a weight of 0.0 (left sub-tree) and 1.0 (right sub-tree). An input value of 0.0 is mapped to an equal weight of 0.5 for both the left and right sub-tree. All other values in the range [-a,b] are mapped to intermediate values using a smooth and continous curve. The parameter c must be set to a value in the range ]-1.0,1.0] and controls the smoothness of the curve around the corner points -a and b. A value if 1.0 assures a smooth transition (sigmoid-alike curve). A value close to -1.0 give rise to a very fast step-alike transition from 0.5 to 1.0 around the two corner points. The left/right sub-tree is only evaluated if the corresponding weight is non-zero.

expR1(r=1,c=0,a=1,b=1): Exponent with a regularisation cost on the output values y of (0.5*(y-c)^2)*r.
expR2(r=1,c=0,a=1,b=1): Exponent with a regularisation cost on the input values x of (0.5*x^2)*r.
sigmR1(r=1,a=1): Sigmoide with an optional scaling of the input with a, training with a regularisation cost of (.25-dsigm(x)/dx)*r.
tanhR1(r=1,a=1): Tanh with an optional scaling of the input with a, training with a regularisation cost of (1.0-dtanh(x)/dx)*r.
sigmR2(r=1,a=1): Sigmoide with an optional scaling of the input with a, training with a regularisation cost of r/(1+exp(256/x^2)).
tanhR2(r=1,a=1): Tanh with an optional scaling of the input with a, training with a regularisation cost of r/(1+exp(64/x^2)).
sigmR3(r=1,a=1): Sigmoide with an optional scaling of the input with a, training with a regularisation cost of (0.5*x^2)*r.
tanhR3(r=1,a=1): Tanh with an optional scaling of the input with a, training with a regularisation cost of (0.5*x^2)*r.

The available connection types are:

direct: A 1-to-1 connection. This connection type may also use tied parameters for all arcs.
softmax: Compute the exponent of all inputs and normalize this output vector so that the sum equals to 1.0 (a probability distribution).
norm: Divide all inputs by the 1, 2, or inf(-1) norm of the inputs.
full: A full connection: every output node is connected with all input nodes. The results of all incomming connections are added.
sparse: A sparse connection between input and output layer. The individual connections are enumerated. See below for a description of the format.
tree: Identical to a full connection, only the evalutation order differs. A tree connection has a hierarchical order (binary tree) in which only one of the two descendents is evaluated, except when a point falls in a transition region. In this case, both descendents are evaluated. See below for a description of the format.

The MLP description file has the following structure:

[layers]
  Input         <nr_input_nodes>
  <layer_name>  <nr_nodes>
  Output        <nr_output_nodes>
[connections]
  <from>[+] <to>[<range>] <type>[(<alt_opt>)] [ndx_file] <param_file> <functions>
  ...
[options]
  <options>

A layer thus has a unique name and a size (number of nodes). A connection is described with a source and destination layer, an optional range of nodes in the output layer (continuous, non-verlapping), a connection type and a sequence of functions.
The optional '+' that may follow the name of the input layer indicates that one extra bias node (with a value of 1.0) must be added as extra input at the end of the input layer. The connection type has the following format:

<type>(<alt_opt>)

The <alt_opt> is optional and modifies the default behaviour of the connection type. The following connection types are available:

direct(shared): one-to-one connection, optionally the parameters are shared.
full(trans): full connection (each node to each node), optionally the parameters are stored in a transposed order (faster evaluation, conformant to the parameter layout for tree evaluation).
parse(excl): sparse connections (each input node connects to a selected set of output nodes), the 'excl' specifier should be present when each output node has only one incomming arc.
tree(<nsd>,<buf>): tree structured layer, optionally parts of the non selected sub-tree are evaluated also: the <nsd> option indicates that the non-selected sub-tree should be evaluated to a depth <nsd>, the <buf> option specifies a layer with weights for which any sub-tree with a non-zero weight will be evaluated.

The functions are described as follows:

<name>[<train_arg>](<extra_args>)

The train arguments <train_arg> are optional. They are specified between square brackets '[]' and consist of the letters:

T or C: Parameters that need to be (T)rained or parameters that are fixed (C)onstants.
P or B: To either (P)ropagate the error to the previous layer, or to (B)lock the error back propagation.

The extra arguments are specified between braces '()', seperated by commas. For the list allowed arguments per function, their type (float, integer or string) and the effect they have depend on the function, see above.

The index file that specifies the sparse connectivity consists of the concattenation of (-1) terminated arrays (of the type I32) listing the set of outputs for each input. For example, the following indices

[0 2 -1 ...
 1 2 -1
 3 -1]

describe the connections of a layer that transforms 3 inputs into 4 outputs with the following connectivity:

[1 0 0 1
 0 1 0 1
 0 0 1 0]

The binary tree (tree connection type) has the following properties:

node 0 is the root node
left child nodes immediately follow the parent node
right child nodes must follow (i.e. cannot precede) the parent node

The tree structure is stored as a two valued (of the type I32) tuple per node. The first value contains the (right) child information:

<has_left_child>*1 + <has_right_child>*2 + <offset_to_right_child>*4

with offset_to_right_child equal to 0 if the node does not have a right child. The second value contains the parent information:

<is_left_child>*1 + <is_right_child>*2 + <ndx_of_parent_node_base0>*4

with ndx_of_parent_node_base0, is_left_child and is_right_child equal to -1, 0 and 0 respectively for the root node.

Note: The dropout function and the set_weight functions return exact 0.0 values for non-active outputs. The normal 0.0 value is replaced with a very small positive value.

Date: Jan 1999

Author: Kris Demuynck

Revision History:

XX/01/1999 - KD: Creation
13/04/2010 - KD: added to SPRAAK
01/10/2012 - KD: clean-up, documentation, added new functions
04/12/2015 - KD: new version with new functionality, speed-up, ... (derived from the old mlp software)

Data Structures

Macros

Typedefs

Enumerations

Functions

Variables

Detailed Description