Search routines for Large Vocabulary Continuous Speech Recognition. More...

Data Structures
struct	SprCwrNodeSeq

struct	SprCwrWlatNode
	a node in the word lattice More...

struct	SprCwrWlatArc
	an arc in the word lattice More...

struct	SprCwrWordHist

struct	SprCwrToken

struct	SprCwrCToken
	collective token More...

struct	SprCwrBeamHandler

struct	SprCwrSearchResult

struct	SprCwrSearchCToken

struct	SprCwrSearchWordHist

struct	SprCwrSearchWlat

struct	SprCwrSearchUpdate
	decoder settings that can be updated during the search More...

struct	SprCwrSearchAdaptSpkr
	adaptive pruning settings that can be made speaker specific More...

struct	SprCwrSearchStd

struct	SprCwrSearchSpace

struct	SprCwrLostCount

Typedefs
typedef SprCwrWlatNode *	SprCwrWlatNodePtr

typedef SprCwrWlatArc *	SprCwrWlatArcPtr

typedef int(*	SprCwrWlatWrite )(int action, void *info,...)

typedef void (	SprCwrSTHandler )(int action, struct spr_t_cwr_search_space search_space, void ssp_des, void *data, int iframe)

Enumerations
enum	{ SPR_CWR_STORE_NOTHING, SPR_CWR_STORE_PHONES, SPR_CWR_STORE_NODES }

enum	{ SPR_CWR_ST_FRAME_POST, SPR_CWR_ST_SKIP, SPR_CWR_ST_FRAME_PRE, SPR_CWR_ST_WAIT, SPR_CWR_ST_START, SPR_CWR_ST_END, SPR_CWR_ST_INSTALL, SPR_CWR_ST_UNINSTALL, SPR_CWR_ST_MODIF }

enum	{ SPR_CWR_SEARCH_CHANGE_START, SPR_CWR_SEARCH_CHANGE_STOP, SPR_CWR_LOAD_PHONES, SPR_CWR_UNLINK_LEX, SPR_CWR_LINK_LEX, SPR_CWR_UNLINK_LM, SPR_CWR_LINK_LM, SPR_CWR_LOAD_HMM, SPR_CWR_SEARCH_MAX, SPR_CWR_SEARCH_MIN, SPR_CWR_SEARCH_H_ADAPT, SPR_CWR_SEARCH_MODE, SPR_CWR_SEARCH_LMI, SPR_CWR_SEARCH_CONTRAINT, SPR_CWR_SEARCH_TRACE, SPR_CWR_WLAT_HANDLER, SPR_CWR_BEAM_HANDLER, SPR_CWR_MSS_NR_ACTIONS }

enum	{ SPR_CWR_PHONE_LOADED, SPR_CWR_LEX_LOADED, SPR_CWR_HMM_LOADED, SPR_CWR_LM_LOADED, SPR_CWR_SEARCH_READY, SPR_CWR_MODIF_CLOSED, SPR_CWR_LM_FWD_READY, SPR_CWR_LM_CACHE_READY, SPR_CWR_LMC_HASH_READY, SPR_CWR_SCORE_HIST_READY, SPR_CWR_CHECK_SEARCH_READY }

enum	{ SPR_CWR_END_ALL, SPR_CWR_END_WEND, SPR_CWR_END_SEND }

enum	{ SPR_CWR_FWD_NONE, SPR_CWR_FWD_UNI, SPR_CWR_FWD_LTD, SPR_CWR_FWD_FULL }

Functions
void	spr_cwr_node_seq_print (SprStream dest, const char lm, const SprCwrNodeSeq node_seq, const char rm, const SprCwrSearchSpace *search_space)
	Print the given node sequence in textual format. More...

void	spr_cwr_sent_print (SprStream dest, const SprCwrWordHist word_hist, const SprCwrSearchSpace *search_space)

int	spr_cwr_tokens_check (const SprCwrSearchSpace search_space, SprCwrHashP2X ctoken_htbl, SprCwrHashP2X *whist_htbl)
	Check the token lists for errors and inconsistencies. More...

int	spr_cwr_search_space_check (const SprCwrSearchSpace *search_space)
	Check the search space for errors and inconsistencies. More...

int	spr_cwr_search_info_reset (SprCwrSearchSpace *search_space, int spkr_id)

void	spr_cwr_gauss_limit (SprCwrSearchSpace *search_space)

void	spr_cwr_search_info_compact (SprCwrSearchSpace *search_space)

void	spr_cwr_search_space_parts_print (SprStream dest, const char lm, int parts_mask, const char *rm)

int	spr_cwr_search_space_modify (SprCwrSearchSpace *search_space, unsigned int action,...)

SprCwrSearchSpace *	spr_cwr_search_space_free (SprCwrSearchSpace search_space, SprMsgId routine)

SprCwrSearchSpace *	spr_cwr_search_space_new (void)

SprCwrToken *	spr_iwr_best_word (int top_n, SprCwrSearchSpace *search_space)

SprCwrToken *	spr_cwr_best_sent (SprCwrSearchSpace *search_space)

SprCwrToken *	spr_cwr_find_best_token (SprCwrCToken *Best_ctoken, SprCwrSearchSpace search_space)

int	spr_cwr_data_process (SprStream dfptr, int nfr, SprSspDesc ssp_des, SprCwrSearchSpace *search_space)

Variables
const char *const	spr_cwr_str_store_nodes []

const SprCwrSearchCToken	spr_cwr_empty_search_ctoken

const SprCwrSearchWordHist	spr_cwr_empty_search_word_hist

const SprCmdOptDesc	spr_cwr_od_wlat_opts []

const SprCwrSearchWlat	spr_cwr_empty_search_wlat

const char *const	spr_cwr_load_flags_str []

Detailed Description

Search routines for Large Vocabulary Continuous Speech Recognition.

Nomenclature

state: An HMM-state.
phone: The basic acoustical units used to model the words, e.g. phonemes, context dependent phonemes or phoneme classes.
token: A running hypothesis, named so in accordance to the token passing paradigm (see phd. thesis Odell). Note that the algorithm used here combines tokens into a collective token, and that tokens are conditioned.

Search strategy: The search is performed with the tokens as basic structure to expand and recombine. First, the states to be evaluated are marked and evaluated. Next, the token scores are adjusted to accommodate for the new observation, but no transitions are taken. In a second pass, the word internal transitions are followed (expanded), pruned and recombined when necessary. Finally the token list is pruned again (with an updated maximum score) to keep the number of hypotheses low (margin w.r.t. the best token and a maximum on the number of running tokens). At the start of the next frame, all inter word transitions are performed, using the pruning threshold of the previous frame.

Word scores

The observation score:
The duration score:
The language model score: The score is a log. prob. that is upgraded by a fixed factor and a constant (word penalty start).

Date: Sep 1995

Author: Kris Demuynck

Bug:

If the FULL_DEBUG flag is set during compilation (-DFULL_DEBUG), some extra debug mesages and tests are added. These tests result in a serious overhead.

Revision History:

26/11/96 - KD: Converted the nomencalture (word hyp -> token) + the use of collective tokens.
02/12/97 - KD: Adapted for state based lexica + removed the non markov duration possibilities.
05/09/98 - KD: Added the continuous recognition mode
15/09/98 - KD: Added the word lattices
25/08/02 - KD: Added more LM-forwarding methods + changed modify_search_space() for faster loading
10/08/04 - KD: Improved word lattice algorithm
20/08/04 - KD: Defined sub-structures in the monolithic 'SearchSpace' structure for easier reusability of structures/code in derivated search engines

Data Structures

Typedefs

Enumerations

Functions

Variables

Detailed Description