66 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			66 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/bin/bash
 | |
| 
 | |
| . ./path.sh || exit 1;
 | |
| 
 | |
| stage=0
 | |
| use_all_phonemes=1
 | |
| dict_type=phn # char or phn
 | |
| output_dir=$1
 | |
| dict_path=$2
 | |
| train_corpus=$3
 | |
| sil_prob=$4
 | |
| formatted_lm_corpus=$5
 | |
| prune_threshold=$6
 | |
| order=$7
 | |
| position_dependent_phone=$8
 | |
| 
 | |
| set -xe
 | |
| set -u
 | |
| set -o pipefail
 | |
| 
 | |
| if [ ${stage} -le 0 ]; then
 | |
|     # Build LM
 | |
|     if [ ! -f $formatted_lm_corpus ]; then
 | |
|         mkdir -p $output_dir/data/local/lm_data
 | |
|         formatted_lm_corpus=$output_dir/data/local/lm_data/corpus.txt
 | |
|         python local/format_lm_data.py \
 | |
|             --input_text $train_corpus \
 | |
|             --output_text $formatted_lm_corpus \
 | |
|             --dict $dict_path \
 | |
|             --unk
 | |
|     fi
 | |
|     local/build_lm.sh \
 | |
|         $formatted_lm_corpus \
 | |
|         $output_dir/data/local/lm \
 | |
|         $dict_type \
 | |
|         $order \
 | |
|         $prune_threshold \
 | |
|         $dict_path
 | |
| fi
 | |
| 
 | |
| if [ ${stage} -le 1 ]; then
 | |
|     # Prepare L.fst
 | |
|     local/prepare_dict_ctc.sh $output_dir/data/local/lm $output_dir/data/local/dict_phn $use_all_phonemes $position_dependent_phone
 | |
|     tools/fst/ctc_compile_dict_token.sh --dict-type $dict_type --sil-prob $sil_prob \
 | |
|         $output_dir/data/local/dict_phn $output_dir/data/local/lang_phn_tmp $output_dir/data/lang_phn
 | |
| fi
 | |
| 
 | |
| if [ ${stage} -le 2 ]; then
 | |
|     # Build TLG decoding graph
 | |
|     tools/fst/make_tlg.sh $output_dir/data/local/lm $output_dir/data/lang_phn $output_dir/data/lang_test
 | |
| fi
 | |
| 
 | |
| 
 | |
| # Commands for testing WER:
 | |
| #GLOG_v=1 GLOG_logtostderr=1 ./brain_speech_decoder_main \
 | |
| #    -data_path /home/cfan/logits.npy \
 | |
| #    -fst_path ../../../../examples/brainspeech/s0/data/lang_test/TLG.fst \
 | |
| #    -dict_path ../../../../examples/brainspeech/s0/data/lang_test/words.txt \
 | |
| #    -beam 17 \
 | |
| #    -lattice_beam 8 \
 | |
| #    -blank_skip_thresh 0.98 \
 | |
| #    -acoustic_scale 1.2 \
 | |
| #    -rescore_lm_fst_path ../../../../examples/brainspeech/s0/data/lang_test/G_no_prune.fst \
 | |
| #    -lm_fst_path ../../../../examples/brainspeech/s0/data/lang_test/G.fst \
 | |
| #    -result test_rescore_as12.hyp
 | 
