aboutsummaryrefslogtreecommitdiff
path: root/dnn
diff options
context:
space:
mode:
Diffstat (limited to 'dnn')
-rw-r--r--dnn/LPCNet.yml24
-rw-r--r--dnn/README1
-rw-r--r--dnn/README.md126
-rw-r--r--dnn/adaconvtest.c449
-rw-r--r--dnn/arm/arm_dnn_map.c88
-rw-r--r--dnn/arm/dnn_arm.h104
-rw-r--r--dnn/arm/nnet_dotprod.c38
-rw-r--r--dnn/arm/nnet_neon.c38
-rw-r--r--dnn/burg.c246
-rw-r--r--dnn/burg.h41
-rw-r--r--dnn/common.h56
-rw-r--r--dnn/datasets.txt173
-rw-r--r--dnn/download_model.bat9
-rwxr-xr-xdnn/download_model.sh10
-rw-r--r--dnn/dred_coding.c44
-rw-r--r--dnn/dred_coding.h36
-rw-r--r--dnn/dred_config.h54
-rw-r--r--dnn/dred_decoder.c129
-rw-r--r--dnn/dred_decoder.h49
-rw-r--r--dnn/dred_encoder.c363
-rw-r--r--dnn/dred_encoder.h71
-rw-r--r--dnn/dred_rdovae.h42
-rw-r--r--dnn/dred_rdovae_dec.c139
-rw-r--r--dnn/dred_rdovae_dec.h53
-rw-r--r--dnn/dred_rdovae_enc.c110
-rw-r--r--dnn/dred_rdovae_enc.h52
-rw-r--r--dnn/dump_data.c280
-rw-r--r--dnn/dump_lpcnet_tables.c104
-rw-r--r--dnn/fargan.c225
-rw-r--r--dnn/fargan.h68
-rw-r--r--dnn/freq.c328
-rw-r--r--dnn/freq.h61
-rw-r--r--dnn/fwgan.c322
-rw-r--r--dnn/fwgan.h83
-rw-r--r--dnn/kiss99.c81
-rw-r--r--dnn/kiss99.h46
-rw-r--r--dnn/lossgen.c196
-rw-r--r--dnn/lossgen.h55
-rw-r--r--dnn/lossgen_demo.c22
-rw-r--r--dnn/lpcnet.c283
-rw-r--r--dnn/lpcnet.h183
-rw-r--r--dnn/lpcnet_demo.c217
-rw-r--r--dnn/lpcnet_enc.c230
-rw-r--r--dnn/lpcnet_plc.c211
-rw-r--r--dnn/lpcnet_private.h90
-rw-r--r--dnn/lpcnet_tables.c307
-rw-r--r--dnn/meson.build64
-rw-r--r--dnn/nndsp.c416
-rw-r--r--dnn/nndsp.h143
-rw-r--r--dnn/nnet.c149
-rw-r--r--dnn/nnet.h163
-rw-r--r--dnn/nnet_arch.h247
-rw-r--r--dnn/nnet_default.c35
-rw-r--r--dnn/osce.c1419
-rw-r--r--dnn/osce.h84
-rw-r--r--dnn/osce_config.h60
-rw-r--r--dnn/osce_features.c454
-rw-r--r--dnn/osce_features.h50
-rw-r--r--dnn/osce_structs.h125
-rw-r--r--dnn/parse_lpcnet_weights.c238
-rw-r--r--dnn/pitchdnn.c79
-rw-r--r--dnn/pitchdnn.h34
-rw-r--r--dnn/tansig_table.h50
-rw-r--r--dnn/test_vec.c128
-rw-r--r--dnn/torch/dnntools/dnntools/__init__.py2
-rw-r--r--dnn/torch/dnntools/dnntools/quantization/__init__.py1
-rw-r--r--dnn/torch/dnntools/dnntools/quantization/softquant.py113
-rw-r--r--dnn/torch/dnntools/dnntools/relegance/__init__.py2
-rw-r--r--dnn/torch/dnntools/dnntools/relegance/meta_critic.py85
-rw-r--r--dnn/torch/dnntools/dnntools/relegance/relegance.py449
-rw-r--r--dnn/torch/dnntools/dnntools/sparsification/__init__.py6
-rw-r--r--dnn/torch/dnntools/dnntools/sparsification/base_sparsifier.py58
-rw-r--r--dnn/torch/dnntools/dnntools/sparsification/common.py123
-rw-r--r--dnn/torch/dnntools/dnntools/sparsification/conv1d_sparsifier.py133
-rw-r--r--dnn/torch/dnntools/dnntools/sparsification/conv_transpose1d_sparsifier.py134
-rw-r--r--dnn/torch/dnntools/dnntools/sparsification/gru_sparsifier.py178
-rw-r--r--dnn/torch/dnntools/dnntools/sparsification/linear_sparsifier.py128
-rw-r--r--dnn/torch/dnntools/dnntools/sparsification/utils.py64
-rw-r--r--dnn/torch/dnntools/requirements.txt1
-rw-r--r--dnn/torch/dnntools/setup.py48
-rw-r--r--dnn/torch/fargan/adv_train_fargan.py277
-rw-r--r--dnn/torch/fargan/dataset.py61
-rw-r--r--dnn/torch/fargan/dump_fargan_weights.py112
-rw-r--r--dnn/torch/fargan/fargan.py322
-rw-r--r--dnn/torch/fargan/filters.py46
-rw-r--r--dnn/torch/fargan/rc.py29
-rw-r--r--dnn/torch/fargan/stft_loss.py186
-rw-r--r--dnn/torch/fargan/test_fargan.py128
-rw-r--r--dnn/torch/fargan/train_fargan.py168
-rw-r--r--dnn/torch/fwgan/dump_model_weights.py88
-rw-r--r--dnn/torch/fwgan/inference.py141
-rw-r--r--dnn/torch/fwgan/models/__init__.py7
-rw-r--r--dnn/torch/fwgan/models/fwgan400.py308
-rw-r--r--dnn/torch/fwgan/models/fwgan500.py260
-rw-r--r--dnn/torch/lossgen/README.md27
-rw-r--r--dnn/torch/lossgen/export_lossgen.py101
-rw-r--r--dnn/torch/lossgen/lossgen.py29
-rwxr-xr-xdnn/torch/lossgen/process_data.sh17
-rw-r--r--dnn/torch/lossgen/test_lossgen.py42
-rw-r--r--dnn/torch/lossgen/train_lossgen.py99
-rw-r--r--dnn/torch/lpcnet/README.md27
-rw-r--r--dnn/torch/lpcnet/add_dataset_config.py77
-rw-r--r--dnn/torch/lpcnet/data/__init__.py1
-rw-r--r--dnn/torch/lpcnet/data/lpcnet_dataset.py227
-rw-r--r--dnn/torch/lpcnet/engine/lpcnet_engine.py141
-rw-r--r--dnn/torch/lpcnet/make_default_setup.py56
-rw-r--r--dnn/torch/lpcnet/make_test_config.py78
-rw-r--r--dnn/torch/lpcnet/models/__init__.py8
-rw-r--r--dnn/torch/lpcnet/models/lpcnet.py303
-rw-r--r--dnn/torch/lpcnet/models/multi_rate_lpcnet.py437
-rw-r--r--dnn/torch/lpcnet/print_lpcnet_complexity.py64
-rw-r--r--dnn/torch/lpcnet/scripts/collect_multi_run_results.py190
-rw-r--r--dnn/torch/lpcnet/scripts/loop_run.sh52
-rw-r--r--dnn/torch/lpcnet/scripts/make_animation.py67
-rw-r--r--dnn/torch/lpcnet/scripts/modify_dataset_target.py17
-rw-r--r--dnn/torch/lpcnet/scripts/multi_run.sh17
-rw-r--r--dnn/torch/lpcnet/scripts/run_inference_test.sh22
-rw-r--r--dnn/torch/lpcnet/scripts/update_checkpoints.py54
-rw-r--r--dnn/torch/lpcnet/scripts/update_output_folder.sh22
-rw-r--r--dnn/torch/lpcnet/scripts/update_setups.py57
-rw-r--r--dnn/torch/lpcnet/test_lpcnet.py89
-rw-r--r--dnn/torch/lpcnet/train_lpcnet.py272
-rw-r--r--dnn/torch/lpcnet/utils/__init__.py4
-rw-r--r--dnn/torch/lpcnet/utils/data.py141
-rw-r--r--dnn/torch/lpcnet/utils/endoscopy.py234
-rw-r--r--dnn/torch/lpcnet/utils/layers/__init__.py3
-rw-r--r--dnn/torch/lpcnet/utils/layers/dual_fc.py44
-rw-r--r--dnn/torch/lpcnet/utils/layers/pcm_embeddings.py71
-rw-r--r--dnn/torch/lpcnet/utils/layers/subconditioner.py497
-rw-r--r--dnn/torch/lpcnet/utils/misc.py65
-rw-r--r--dnn/torch/lpcnet/utils/pcm.py35
-rw-r--r--dnn/torch/lpcnet/utils/sample.py44
-rw-r--r--dnn/torch/lpcnet/utils/sparsification/__init__.py2
-rw-r--r--dnn/torch/lpcnet/utils/sparsification/common.py121
-rw-r--r--dnn/torch/lpcnet/utils/sparsification/gru_sparsifier.py187
-rw-r--r--dnn/torch/lpcnet/utils/templates.py157
-rw-r--r--dnn/torch/lpcnet/utils/ulaw.py58
-rw-r--r--dnn/torch/lpcnet/utils/wav.py43
-rw-r--r--dnn/torch/neural-pitch/README.md18
-rw-r--r--dnn/torch/neural-pitch/data_augmentation.py149
-rw-r--r--dnn/torch/neural-pitch/download_demand.sh43
-rw-r--r--dnn/torch/neural-pitch/evaluation.py349
-rw-r--r--dnn/torch/neural-pitch/experiments.py38
-rw-r--r--dnn/torch/neural-pitch/export_neuralpitch_weights.py109
-rw-r--r--dnn/torch/neural-pitch/models.py178
-rw-r--r--dnn/torch/neural-pitch/neural_pitch_update.py179
-rw-r--r--dnn/torch/neural-pitch/ptdb_process.sh34
-rw-r--r--dnn/torch/neural-pitch/run_crepe.py72
-rw-r--r--dnn/torch/neural-pitch/training.py162
-rw-r--r--dnn/torch/neural-pitch/utils.py59
-rw-r--r--dnn/torch/osce/README.md65
-rw-r--r--dnn/torch/osce/adv_train_model.py462
-rw-r--r--dnn/torch/osce/adv_train_vocoder.py451
-rw-r--r--dnn/torch/osce/create_testvectors.py165
-rw-r--r--dnn/torch/osce/data/__init__.py2
-rw-r--r--dnn/torch/osce/data/lpcnet_vocoding_dataset.py225
-rw-r--r--dnn/torch/osce/data/silk_conversion_set.py132
-rw-r--r--dnn/torch/osce/data/silk_enhancement_set.py140
-rw-r--r--dnn/torch/osce/engine/engine.py103
-rw-r--r--dnn/torch/osce/engine/vocoder_engine.py101
-rw-r--r--dnn/torch/osce/export_model_weights.py174
-rw-r--r--dnn/torch/osce/losses/stft_loss.py277
-rw-r--r--dnn/torch/osce/make_default_setup.py93
-rw-r--r--dnn/torch/osce/models/__init__.py42
-rw-r--r--dnn/torch/osce/models/fd_discriminator.py974
-rw-r--r--dnn/torch/osce/models/lace.py190
-rw-r--r--dnn/torch/osce/models/lavoce.py274
-rw-r--r--dnn/torch/osce/models/lavoce_400.py254
-rw-r--r--dnn/torch/osce/models/lpcnet_feature_net.py91
-rw-r--r--dnn/torch/osce/models/nns_base.py69
-rw-r--r--dnn/torch/osce/models/no_lace.py218
-rw-r--r--dnn/torch/osce/models/scale_embedding.py68
-rw-r--r--dnn/torch/osce/models/shape_up_48.py179
-rw-r--r--dnn/torch/osce/models/silk_feature_net.py86
-rw-r--r--dnn/torch/osce/models/silk_feature_net_pl.py127
-rw-r--r--dnn/torch/osce/requirements.txt9
-rw-r--r--dnn/torch/osce/stndrd/evaluation/create_input_data.sh25
-rw-r--r--dnn/torch/osce/stndrd/evaluation/env.rc7
-rw-r--r--dnn/torch/osce/stndrd/evaluation/evaluate.py113
-rw-r--r--dnn/torch/osce/stndrd/evaluation/lace_loss_metric.py330
-rw-r--r--dnn/torch/osce/stndrd/evaluation/make_boxplots.py116
-rw-r--r--dnn/torch/osce/stndrd/evaluation/make_boxplots_moctest.py109
-rw-r--r--dnn/torch/osce/stndrd/evaluation/make_tables.py124
-rw-r--r--dnn/torch/osce/stndrd/evaluation/make_tables_moctest.py121
-rw-r--r--dnn/torch/osce/stndrd/evaluation/moc.py182
-rw-r--r--dnn/torch/osce/stndrd/evaluation/moc2.py190
-rwxr-xr-xdnn/torch/osce/stndrd/evaluation/process_dataset.sh98
-rw-r--r--dnn/torch/osce/stndrd/evaluation/run_nomad.py138
-rw-r--r--dnn/torch/osce/stndrd/presentation/endoscopy.py205
-rw-r--r--dnn/torch/osce/stndrd/presentation/lace_demo.ipynb313
-rw-r--r--dnn/torch/osce/stndrd/presentation/linear_prediction.ipynb320
-rw-r--r--dnn/torch/osce/stndrd/presentation/playback.py25
-rw-r--r--dnn/torch/osce/stndrd/presentation/postfilter.ipynb275
-rw-r--r--dnn/torch/osce/stndrd/presentation/spectrogram.ipynb173
-rw-r--r--dnn/torch/osce/test_model.py96
-rw-r--r--dnn/torch/osce/test_vocoder.py103
-rw-r--r--dnn/torch/osce/train_model.py307
-rw-r--r--dnn/torch/osce/train_vocoder.py287
-rw-r--r--dnn/torch/osce/utils/ada_conv.py71
-rw-r--r--dnn/torch/osce/utils/complexity.py8
-rw-r--r--dnn/torch/osce/utils/endoscopy.py205
-rw-r--r--dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py230
-rw-r--r--dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py200
-rw-r--r--dnn/torch/osce/utils/layers/noise_shaper.py100
-rw-r--r--dnn/torch/osce/utils/layers/pitch_auto_correlator.py84
-rw-r--r--dnn/torch/osce/utils/layers/silk_upsampler.py167
-rw-r--r--dnn/torch/osce/utils/layers/td_shaper.py145
-rw-r--r--dnn/torch/osce/utils/lpcnet_features.py112
-rw-r--r--dnn/torch/osce/utils/misc.py95
-rw-r--r--dnn/torch/osce/utils/moc.py153
-rw-r--r--dnn/torch/osce/utils/pitch.py122
-rw-r--r--dnn/torch/osce/utils/silk_features.py144
-rw-r--r--dnn/torch/osce/utils/softquant.py110
-rw-r--r--dnn/torch/osce/utils/spec.py210
-rw-r--r--dnn/torch/osce/utils/templates.py347
-rw-r--r--dnn/torch/plc/export_plc.py100
-rw-r--r--dnn/torch/plc/plc.py144
-rw-r--r--dnn/torch/plc/plc_dataset.py60
-rw-r--r--dnn/torch/plc/train_plc.py145
-rw-r--r--dnn/torch/rdovae/README.md24
-rw-r--r--dnn/torch/rdovae/export_rdovae_weights.py365
-rw-r--r--dnn/torch/rdovae/fec_encoder.py212
-rw-r--r--dnn/torch/rdovae/import_rdovae_weights.py143
-rw-r--r--dnn/torch/rdovae/packets/__init__.py1
-rw-r--r--dnn/torch/rdovae/packets/fec_packets.c142
-rw-r--r--dnn/torch/rdovae/packets/fec_packets.h34
-rw-r--r--dnn/torch/rdovae/packets/fec_packets.py108
-rw-r--r--dnn/torch/rdovae/rdovae/__init__.py2
-rw-r--r--dnn/torch/rdovae/rdovae/dataset.py67
-rw-r--r--dnn/torch/rdovae/rdovae/rdovae.py719
-rw-r--r--dnn/torch/rdovae/requirements.txt4
-rw-r--r--dnn/torch/rdovae/train_rdovae.py278
-rw-r--r--dnn/torch/testsuite/README.md46
-rw-r--r--dnn/torch/testsuite/examples/lpcnet_c_example.yml6
-rw-r--r--dnn/torch/testsuite/examples/lpcnet_c_plc_example.yml5
-rw-r--r--dnn/torch/testsuite/examples/lpcnet_torch_example.yml5
-rw-r--r--dnn/torch/testsuite/requirements.txt12
-rw-r--r--dnn/torch/testsuite/run_test.py375
-rw-r--r--dnn/torch/testsuite/utils/__init__.py0
-rw-r--r--dnn/torch/testsuite/utils/files.py54
-rw-r--r--dnn/torch/testsuite/utils/pesq.py43
-rw-r--r--dnn/torch/testsuite/utils/pitch.py61
-rw-r--r--dnn/torch/weight-exchange/README.md21
-rw-r--r--dnn/torch/weight-exchange/requirements.txt1
-rw-r--r--dnn/torch/weight-exchange/setup.py48
-rw-r--r--dnn/torch/weight-exchange/wexchange/__init__.py30
-rw-r--r--dnn/torch/weight-exchange/wexchange/c_export/__init__.py31
-rw-r--r--dnn/torch/weight-exchange/wexchange/c_export/c_writer.py182
-rw-r--r--dnn/torch/weight-exchange/wexchange/c_export/common.py387
-rw-r--r--dnn/torch/weight-exchange/wexchange/tf/__init__.py5
-rw-r--r--dnn/torch/weight-exchange/wexchange/tf/tf.py188
-rw-r--r--dnn/torch/weight-exchange/wexchange/torch/__init__.py37
-rw-r--r--dnn/torch/weight-exchange/wexchange/torch/torch.py433
-rw-r--r--dnn/training_tf2/dataloader.py49
-rw-r--r--dnn/training_tf2/decode_rdovae.py111
-rw-r--r--dnn/training_tf2/diffembed.py49
-rwxr-xr-xdnn/training_tf2/dump_lpcnet.py388
-rwxr-xr-xdnn/training_tf2/dump_plc.py296
-rw-r--r--dnn/training_tf2/dump_rdovae.py306
-rw-r--r--dnn/training_tf2/encode_rdovae.py125
-rw-r--r--dnn/training_tf2/fec_encoder.py256
-rw-r--r--dnn/training_tf2/fec_packets.c142
-rw-r--r--dnn/training_tf2/fec_packets.h34
-rw-r--r--dnn/training_tf2/fec_packets.py108
-rw-r--r--dnn/training_tf2/keraslayerdump.py189
-rw-r--r--dnn/training_tf2/lossfuncs.py99
-rw-r--r--dnn/training_tf2/lpcnet.py339
-rw-r--r--dnn/training_tf2/lpcnet_plc.py101
-rw-r--r--dnn/training_tf2/mdense.py95
-rw-r--r--dnn/training_tf2/pade.py70
-rw-r--r--dnn/training_tf2/parameters.py29
-rw-r--r--dnn/training_tf2/plc_loader.py73
-rw-r--r--dnn/training_tf2/rdovae.py372
-rw-r--r--dnn/training_tf2/rdovae_exchange.py138
-rw-r--r--dnn/training_tf2/rdovae_import.py123
-rwxr-xr-xdnn/training_tf2/test_lpcnet.py120
-rw-r--r--dnn/training_tf2/test_plc.py92
-rw-r--r--dnn/training_tf2/tf_funcs.py70
-rwxr-xr-xdnn/training_tf2/train_lpcnet.py214
-rw-r--r--dnn/training_tf2/train_plc.py197
-rw-r--r--dnn/training_tf2/train_rdovae.py151
-rw-r--r--dnn/training_tf2/ulaw.py19
-rw-r--r--dnn/training_tf2/uniform_noise.py78
-rw-r--r--dnn/vec.h389
-rw-r--r--dnn/vec_avx.h884
-rw-r--r--dnn/vec_neon.h473
-rw-r--r--dnn/write_lpcnet_weights.c97
-rw-r--r--dnn/x86/dnn_x86.h121
-rw-r--r--dnn/x86/nnet_avx2.c40
-rw-r--r--dnn/x86/nnet_sse2.c40
-rw-r--r--dnn/x86/nnet_sse4_1.c40
-rw-r--r--dnn/x86/x86_dnn_map.c83
292 files changed, 41471 insertions, 0 deletions
diff --git a/dnn/LPCNet.yml b/dnn/LPCNet.yml
new file mode 100644
index 00000000..3a726657
--- /dev/null
+++ b/dnn/LPCNet.yml
@@ -0,0 +1,24 @@
+#
+# install
+# conda env create -f=LPCNet.yml
+#
+# update
+# conda env update -f=LPCNet.yml
+#
+# activate
+# conda activate LPCNet
+#
+# remove
+# conda remove --name LPCNet --all
+#
+name: LPCNet
+channels:
+ - anaconda
+ - conda-forge
+dependencies:
+ - keras==2.2.4
+ - python>=3.6
+ - tensorflow-gpu==1.12.0
+ - cudatoolkit
+ - h5py
+ - numpy
diff --git a/dnn/README b/dnn/README
new file mode 100644
index 00000000..96dc92fb
--- /dev/null
+++ b/dnn/README
@@ -0,0 +1 @@
+See README.md
diff --git a/dnn/README.md b/dnn/README.md
new file mode 100644
index 00000000..ad4a6724
--- /dev/null
+++ b/dnn/README.md
@@ -0,0 +1,126 @@
+# LPCNet
+
+Low complexity implementation of the WaveRNN-based LPCNet algorithm, as described in:
+
+- J.-M. Valin, J. Skoglund, [LPCNet: Improving Neural Speech Synthesis Through Linear Prediction](https://jmvalin.ca/papers/lpcnet_icassp2019.pdf), *Proc. International Conference on Acoustics, Speech and Signal Processing (ICASSP)*, arXiv:1810.11846, 2019.
+- J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [Neural Speech Synthesis on a Shoestring: Improving the Efficiency of LPCNet](https://jmvalin.ca/papers/improved_lpcnet.pdf), *Proc. ICASSP*, arxiv:2106.04129, 2022.
+- K. Subramani, J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [End-to-end LPCNet: A Neural Vocoder With Fully-Differentiable LPC Estimation](https://jmvalin.ca/papers/lpcnet_end2end.pdf), *Proc. INTERSPEECH*, arxiv:2106.04129, 2022.
+
+For coding/PLC applications of LPCNet, see:
+
+- J.-M. Valin, J. Skoglund, [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://jmvalin.ca/papers/lpcnet_codec.pdf), *Proc. INTERSPEECH*, arxiv:1903.12087, 2019.
+- J. Skoglund, J.-M. Valin, [Improving Opus Low Bit Rate Quality with Neural Speech Synthesis](https://jmvalin.ca/papers/opusnet.pdf), *Proc. INTERSPEECH*, arxiv:1905.04628, 2020.
+- J.-M. Valin, A. Mustafa, C. Montgomery, T.B. Terriberry, M. Klingbeil, P. Smaragdis, A. Krishnaswamy, [Real-Time Packet Loss Concealment With Mixed Generative and Predictive Model](https://jmvalin.ca/papers/lpcnet_plc.pdf), *Proc. INTERSPEECH*, arxiv:2205.05785, 2022.
+- J.-M. Valin, J. Büthe, A. Mustafa, [Low-Bitrate Redundancy Coding of Speech Using a Rate-Distortion-Optimized Variational Autoencoder](https://jmvalin.ca/papers/valin_dred.pdf), *Proc. ICASSP*, arXiv:2212.04453, 2023. ([blog post](https://www.amazon.science/blog/neural-encoding-enables-more-efficient-recovery-of-lost-audio-packets))
+
+# Introduction
+
+Work in progress software for researching low CPU complexity algorithms for speech synthesis and compression by applying Linear Prediction techniques to WaveRNN. High quality speech can be synthesised on regular CPUs (around 3 GFLOP) with SIMD support (SSE2, SSSE3, AVX, AVX2/FMA, NEON currently supported). The code also supports very low bitrate compression at 1.6 kb/s.
+
+The BSD licensed software is written in C and Python/Keras. For training, a GTX 1080 Ti or better is recommended.
+
+This software is an open source starting point for LPCNet/WaveRNN-based speech synthesis and coding.
+
+# Using the existing software
+
+You can build the code using:
+
+```
+./autogen.sh
+./configure
+make
+```
+Note that the autogen.sh script is used when building from Git and will automatically download the latest model
+(models are too large to put in Git). By default, LPCNet will attempt to use 8-bit dot product instructions on AVX\*/Neon to
+speed up inference. To disable that (e.g. to avoid quantization effects when retraining), add --disable-dot-product to the
+configure script. LPCNet does not yet have a complete implementation for some of the integer operations on the ARMv7
+architecture so for now you will also need --disable-dot-product to successfully compile on 32-bit ARM.
+
+It is highly recommended to set the CFLAGS environment variable to enable AVX or NEON *prior* to running configure, otherwise
+no vectorization will take place and the code will be very slow. On a recent x86 CPU, something like
+```
+export CFLAGS='-Ofast -g -march=native'
+```
+should work. On ARM, you can enable Neon with:
+```
+export CFLAGS='-Ofast -g -mfpu=neon'
+```
+While not strictly required, the -Ofast flag will help with auto-vectorization, especially for dot products that
+cannot be optimized without -ffast-math (which -Ofast enables). Additionally, -falign-loops=32 has been shown to
+help on x86.
+
+You can test the capabilities of LPCNet using the lpcnet\_demo application. To encode a file:
+```
+./lpcnet_demo -encode input.pcm compressed.bin
+```
+where input.pcm is a 16-bit (machine endian) PCM file sampled at 16 kHz. The raw compressed data (no header)
+is written to compressed.bin and consists of 8 bytes per 40-ms packet.
+
+To decode:
+```
+./lpcnet_demo -decode compressed.bin output.pcm
+```
+where output.pcm is also 16-bit, 16 kHz PCM.
+
+Alternatively, you can run the uncompressed analysis/synthesis using -features
+instead of -encode and -synthesis instead of -decode.
+The same functionality is available in the form of a library. See include/lpcnet.h for the API.
+
+To try packet loss concealment (PLC), you first need a PLC model, which you can get with:
+```
+./download_model.sh plc-3b1eab4
+```
+or (for the PLC challenge submission):
+```
+./download_model.sh plc_challenge
+```
+PLC can be tested with:
+```
+./lpcnet_demo -plc_file noncausal_dc error_pattern.txt input.pcm output.pcm
+```
+where error_pattern.txt is a text file with one entry per 20-ms packet, with 1 meaning "packet lost" and 0 meaning "packet not lost".
+noncausal_dc is the non-causal (5-ms look-ahead) with special handling for DC offsets. It's also possible to use "noncausal", "causal",
+or "causal_dc".
+
+# Training a new model
+
+This codebase is also meant for research and it is possible to train new models. These are the steps to do that:
+
+1. Set up a Keras system with GPU.
+
+1. Generate training data:
+ ```
+ ./dump_data -train input.s16 features.f32 data.s16
+ ```
+ where the first file contains 16 kHz 16-bit raw PCM audio (no header) and the other files are output files. This program makes several passes over the data with different filters to generate a large amount of training data.
+
+1. Now that you have your files, train with:
+ ```
+ python3 training_tf2/train_lpcnet.py features.f32 data.s16 model_name
+ ```
+ and it will generate an h5 file for each iteration, with model\_name as prefix. If it stops with a
+ "Failed to allocate RNN reserve space" message try specifying a smaller --batch-size for train\_lpcnet.py.
+
+1. You can synthesise speech with Python and your GPU card (very slow):
+ ```
+ ./dump_data -test test_input.s16 test_features.f32
+ ./training_tf2/test_lpcnet.py lpcnet_model_name.h5 test_features.f32 test.s16
+ ```
+
+1. Or with C on a CPU (C inference is much faster):
+ First extract the model files nnet\_data.h and nnet\_data.c
+ ```
+ ./training_tf2/dump_lpcnet.py lpcnet_model_name.h5
+ ```
+ and move the generated nnet\_data.\* files to the src/ directory.
+ Then you just need to rebuild the software and use lpcnet\_demo as explained above.
+
+# Speech Material for Training
+
+Suitable training material can be obtained from [Open Speech and Language Resources](https://www.openslr.org/). See the datasets.txt file for details on suitable training data.
+
+# Reading Further
+
+1. [LPCNet: DSP-Boosted Neural Speech Synthesis](https://people.xiph.org/~jm/demo/lpcnet/)
+1. [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://people.xiph.org/~jm/demo/lpcnet_codec/)
+1. Sample model files (check compatibility): https://media.xiph.org/lpcnet/data/
diff --git a/dnn/adaconvtest.c b/dnn/adaconvtest.c
new file mode 100644
index 00000000..722e4aff
--- /dev/null
+++ b/dnn/adaconvtest.c
@@ -0,0 +1,449 @@
+#include "lace_data.h"
+#include "nolace_data.h"
+#include "osce.h"
+#include "nndsp.h"
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+
+extern const WeightArray lacelayers_arrays[];
+extern const WeightArray nolacelayers_arrays[];
+
+void adaconv_compare(
+ const char * prefix,
+ int num_frames,
+ AdaConvState* hAdaConv,
+ LinearLayer *kernel_layer,
+ LinearLayer *gain_layer,
+ int feature_dim,
+ int frame_size,
+ int overlap_size,
+ int in_channels,
+ int out_channels,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float shape_gain
+)
+{
+ char feature_file[256];
+ char x_in_file[256];
+ char x_out_file[256];
+ char message[512];
+ int i_frame, i_sample;
+ float mse;
+ float features[512];
+ float x_in[512];
+ float x_out_ref[512];
+ float x_out[512];
+ float window[40];
+
+ init_adaconv_state(hAdaConv);
+ compute_overlap_window(window, 40);
+
+ FILE *f_features, *f_x_in, *f_x_out;
+
+ strcpy(feature_file, prefix);
+ strcat(feature_file, "_features.f32");
+ f_features = fopen(feature_file, "r");
+ if (f_features == NULL)
+ {
+ sprintf(message, "could not open file %s", feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_in_file, prefix);
+ strcat(x_in_file, "_x_in.f32");
+ f_x_in = fopen(x_in_file, "r");
+ if (f_x_in == NULL)
+ {
+ sprintf(message, "could not open file %s", x_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_out_file, prefix);
+ strcat(x_out_file, "_x_out.f32");
+ f_x_out = fopen(x_out_file, "r");
+ if (f_x_out == NULL)
+ {
+ sprintf(message, "could not open file %s", x_out_file);
+ perror(message);
+ exit(1);
+ }
+
+ for (i_frame = 0; i_frame < num_frames; i_frame ++)
+ {
+ if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+ exit(1);
+ }
+
+ if (fread(x_in, sizeof(float), frame_size * in_channels, f_x_in) != frame_size * in_channels)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+ exit(1);
+ }
+
+ if (fread(x_out_ref, sizeof(float), frame_size * out_channels, f_x_out) != frame_size * out_channels)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+ exit(1);
+ }
+
+ adaconv_process_frame(hAdaConv, x_out, x_in, features, kernel_layer, gain_layer, feature_dim,
+ frame_size, overlap_size, in_channels, out_channels, kernel_size, left_padding,
+ filter_gain_a, filter_gain_b, shape_gain, window, 0);
+
+ mse = 0;
+ for (i_sample = 0; i_sample < frame_size * out_channels; i_sample ++)
+ {
+ mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+ }
+ mse = sqrt(mse / (frame_size * out_channels));
+ printf("rmse[%d] %f\n", i_frame, mse);
+
+ }
+}
+
+
+void adacomb_compare(
+ const char * prefix,
+ int num_frames,
+ AdaCombState* hAdaComb,
+ LinearLayer *kernel_layer,
+ LinearLayer *gain_layer,
+ LinearLayer *global_gain_layer,
+ int feature_dim,
+ int frame_size,
+ int overlap_size,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float log_gain_limit
+)
+{
+ char feature_file[256];
+ char x_in_file[256];
+ char p_in_file[256];
+ char x_out_file[256];
+ char message[512];
+ int i_frame, i_sample;
+ float mse;
+ float features[512];
+ float x_in[512];
+ float x_out_ref[512];
+ float x_out[512];
+ int pitch_lag;
+ float window[40];
+
+ init_adacomb_state(hAdaComb);
+ compute_overlap_window(window, 40);
+
+ FILE *f_features, *f_x_in, *f_p_in, *f_x_out;
+
+ strcpy(feature_file, prefix);
+ strcat(feature_file, "_features.f32");
+ f_features = fopen(feature_file, "r");
+ if (f_features == NULL)
+ {
+ sprintf(message, "could not open file %s", feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_in_file, prefix);
+ strcat(x_in_file, "_x_in.f32");
+ f_x_in = fopen(x_in_file, "r");
+ if (f_x_in == NULL)
+ {
+ sprintf(message, "could not open file %s", x_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(p_in_file, prefix);
+ strcat(p_in_file, "_p_in.s32");
+ f_p_in = fopen(p_in_file, "r");
+ if (f_p_in == NULL)
+ {
+ sprintf(message, "could not open file %s", p_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_out_file, prefix);
+ strcat(x_out_file, "_x_out.f32");
+ f_x_out = fopen(x_out_file, "r");
+ if (f_x_out == NULL)
+ {
+ sprintf(message, "could not open file %s", x_out_file);
+ perror(message);
+ exit(1);
+ }
+
+ for (i_frame = 0; i_frame < num_frames; i_frame ++)
+ {
+ if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+ exit(1);
+ }
+
+ if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+ exit(1);
+ }
+
+ if (fread(&pitch_lag, sizeof(int), 1, f_p_in) != 1)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, p_in_file);
+ exit(1);
+ }
+
+ if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+ exit(1);
+ }
+
+ adacomb_process_frame(hAdaComb, x_out, x_in, features, kernel_layer, gain_layer, global_gain_layer,
+ pitch_lag, feature_dim, frame_size, overlap_size, kernel_size, left_padding, filter_gain_a, filter_gain_b, log_gain_limit, window, 0);
+
+
+ mse = 0;
+ for (i_sample = 0; i_sample < frame_size; i_sample ++)
+ {
+ mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+ }
+ mse = sqrt(mse / (frame_size));
+ printf("rmse[%d] %f\n", i_frame, mse);
+
+ }
+}
+
+void adashape_compare(
+ const char * prefix,
+ int num_frames,
+ AdaShapeState* hAdaShape,
+ LinearLayer *alpha1,
+ LinearLayer *alpha2,
+ int feature_dim,
+ int frame_size,
+ int avg_pool_k
+)
+{
+ char feature_file[256];
+ char x_in_file[256];
+ char x_out_file[256];
+ char message[512];
+ int i_frame, i_sample;
+ float mse;
+ float features[512];
+ float x_in[512];
+ float x_out_ref[512];
+ float x_out[512];
+
+ init_adashape_state(hAdaShape);
+
+ FILE *f_features, *f_x_in, *f_x_out;
+
+ strcpy(feature_file, prefix);
+ strcat(feature_file, "_features.f32");
+ f_features = fopen(feature_file, "r");
+ if (f_features == NULL)
+ {
+ sprintf(message, "could not open file %s", feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_in_file, prefix);
+ strcat(x_in_file, "_x_in.f32");
+ f_x_in = fopen(x_in_file, "r");
+ if (f_x_in == NULL)
+ {
+ sprintf(message, "could not open file %s", x_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_out_file, prefix);
+ strcat(x_out_file, "_x_out.f32");
+ f_x_out = fopen(x_out_file, "r");
+ if (f_x_out == NULL)
+ {
+ sprintf(message, "could not open file %s", x_out_file);
+ perror(message);
+ exit(1);
+ }
+
+ for (i_frame = 0; i_frame < num_frames; i_frame ++)
+ {
+ if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
+ exit(1);
+ }
+
+ if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
+ exit(1);
+ }
+
+ if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
+ {
+ fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
+ exit(1);
+ }
+
+ adashape_process_frame(hAdaShape, x_out, x_in, features, alpha1, alpha2, feature_dim,
+ frame_size, avg_pool_k, 0);
+
+ mse = 0;
+ for (i_sample = 0; i_sample < frame_size; i_sample ++)
+ {
+ mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
+ }
+ mse = sqrt(mse / (frame_size));
+ printf("rmse[%d] %f\n", i_frame, mse);
+
+ }
+}
+
+
+int main()
+{
+ LACELayers hLACE;
+ NOLACELayers hNoLACE;
+
+ AdaConvState hAdaConv;
+ AdaCombState hAdaComb;
+ AdaShapeState hAdaShape;
+
+ init_adaconv_state(&hAdaConv);
+
+ init_lacelayers(&hLACE, lacelayers_arrays);
+ init_nolacelayers(&hNoLACE, nolacelayers_arrays);
+
+ printf("\ntesting lace.af1 (1 in, 1 out)...\n");
+ adaconv_compare(
+ "testvectors/lace_af1",
+ 5,
+ &hAdaConv,
+ &hLACE.lace_af1_kernel,
+ &hLACE.lace_af1_gain,
+ LACE_AF1_FEATURE_DIM,
+ LACE_AF1_FRAME_SIZE,
+ LACE_AF1_OVERLAP_SIZE,
+ LACE_AF1_IN_CHANNELS,
+ LACE_AF1_OUT_CHANNELS,
+ LACE_AF1_KERNEL_SIZE,
+ LACE_AF1_LEFT_PADDING,
+ LACE_AF1_FILTER_GAIN_A,
+ LACE_AF1_FILTER_GAIN_B,
+ LACE_AF1_SHAPE_GAIN
+ );
+
+
+ printf("\ntesting nolace.af1 (1 in, 2 out)...\n");
+ adaconv_compare(
+ "testvectors/nolace_af1",
+ 5,
+ &hAdaConv,
+ &hNoLACE.nolace_af1_kernel,
+ &hNoLACE.nolace_af1_gain,
+ NOLACE_AF1_FEATURE_DIM,
+ NOLACE_AF1_FRAME_SIZE,
+ NOLACE_AF1_OVERLAP_SIZE,
+ NOLACE_AF1_IN_CHANNELS,
+ NOLACE_AF1_OUT_CHANNELS,
+ NOLACE_AF1_KERNEL_SIZE,
+ NOLACE_AF1_LEFT_PADDING,
+ NOLACE_AF1_FILTER_GAIN_A,
+ NOLACE_AF1_FILTER_GAIN_B,
+ NOLACE_AF1_SHAPE_GAIN
+ );
+
+
+ printf("testing nolace.af4 (2 in, 1 out)...\n");
+ adaconv_compare(
+ "testvectors/nolace_af4",
+ 5,
+ &hAdaConv,
+ &hNoLACE.nolace_af4_kernel,
+ &hNoLACE.nolace_af4_gain,
+ NOLACE_AF4_FEATURE_DIM,
+ NOLACE_AF4_FRAME_SIZE,
+ NOLACE_AF4_OVERLAP_SIZE,
+ NOLACE_AF4_IN_CHANNELS,
+ NOLACE_AF4_OUT_CHANNELS,
+ NOLACE_AF4_KERNEL_SIZE,
+ NOLACE_AF4_LEFT_PADDING,
+ NOLACE_AF4_FILTER_GAIN_A,
+ NOLACE_AF4_FILTER_GAIN_B,
+ NOLACE_AF4_SHAPE_GAIN
+ );
+
+ printf("\ntesting nolace.af2 (2 in, 2 out)...\n");
+ adaconv_compare(
+ "testvectors/nolace_af2",
+ 5,
+ &hAdaConv,
+ &hNoLACE.nolace_af2_kernel,
+ &hNoLACE.nolace_af2_gain,
+ NOLACE_AF2_FEATURE_DIM,
+ NOLACE_AF2_FRAME_SIZE,
+ NOLACE_AF2_OVERLAP_SIZE,
+ NOLACE_AF2_IN_CHANNELS,
+ NOLACE_AF2_OUT_CHANNELS,
+ NOLACE_AF2_KERNEL_SIZE,
+ NOLACE_AF2_LEFT_PADDING,
+ NOLACE_AF2_FILTER_GAIN_A,
+ NOLACE_AF2_FILTER_GAIN_B,
+ NOLACE_AF2_SHAPE_GAIN
+ );
+
+ printf("\ntesting lace.cf1...\n");
+ adacomb_compare(
+ "testvectors/lace_cf1",
+ 5,
+ &hAdaComb,
+ &hLACE.lace_cf1_kernel,
+ &hLACE.lace_cf1_gain,
+ &hLACE.lace_cf1_global_gain,
+ LACE_CF1_FEATURE_DIM,
+ LACE_CF1_FRAME_SIZE,
+ LACE_CF1_OVERLAP_SIZE,
+ LACE_CF1_KERNEL_SIZE,
+ LACE_CF1_LEFT_PADDING,
+ LACE_CF1_FILTER_GAIN_A,
+ LACE_CF1_FILTER_GAIN_B,
+ LACE_CF1_LOG_GAIN_LIMIT
+ );
+
+ printf("\ntesting nolace.tdshape1...\n");
+ adashape_compare(
+ "testvectors/nolace_tdshape1",
+ 5,
+ &hAdaShape,
+ &hNoLACE.nolace_tdshape1_alpha1,
+ &hNoLACE.nolace_tdshape1_alpha2,
+ NOLACE_TDSHAPE1_FEATURE_DIM,
+ NOLACE_TDSHAPE1_FRAME_SIZE,
+ NOLACE_TDSHAPE1_AVG_POOL_K
+ );
+
+ return 0;
+}
+
+/* gcc -DVAR_ARRAYS -DENABLE_OSCE -I ../include -I ../silk -I . -I ../celt adaconvtest.c nndsp.c lace_data.c nolace_data.c nnet.c nnet_default.c ../celt/pitch.c ../celt/celt_lpc.c parse_lpcnet_weights.c -lm -o adaconvtest */ \ No newline at end of file
diff --git a/dnn/arm/arm_dnn_map.c b/dnn/arm/arm_dnn_map.c
new file mode 100644
index 00000000..98a2a312
--- /dev/null
+++ b/dnn/arm/arm_dnn_map.c
@@ -0,0 +1,88 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "arm/armcpu.h"
+#include "nnet.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) && !defined(OPUS_ARM_PRESUME_DOTPROD))
+
+void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
+ const LinearLayer *linear,
+ float *out,
+ const float *in
+) = {
+ compute_linear_c, /* default */
+ compute_linear_c,
+ compute_linear_c,
+ MAY_HAVE_NEON(compute_linear), /* neon */
+ MAY_HAVE_DOTPROD(compute_linear) /* dotprod */
+};
+
+#endif
+
+#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) && !defined(OPUS_ARM_PRESUME_NEON)
+
+void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
+ float *output,
+ const float *input,
+ int N,
+ int activation
+) = {
+ compute_activation_c, /* default */
+ compute_activation_c,
+ compute_activation_c,
+ MAY_HAVE_NEON(compute_activation), /* neon */
+ MAY_HAVE_DOTPROD(compute_activation) /* dotprod */
+};
+
+void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
+ const Conv2dLayer *conv,
+ float *out,
+ float *mem,
+ const float *in,
+ int height,
+ int hstride,
+ int activation
+) = {
+ compute_conv2d_c, /* default */
+ compute_conv2d_c,
+ compute_conv2d_c,
+ MAY_HAVE_NEON(compute_conv2d), /* neon */
+ MAY_HAVE_DOTPROD(compute_conv2d) /* dotprod */
+};
+
+
+#endif
+
+
+#endif
diff --git a/dnn/arm/dnn_arm.h b/dnn/arm/dnn_arm.h
new file mode 100644
index 00000000..d7ac7452
--- /dev/null
+++ b/dnn/arm/dnn_arm.h
@@ -0,0 +1,104 @@
+/* Copyright (c) 2011-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DNN_ARM_H
+#define DNN_ARM_H
+
+#include "cpu_support.h"
+#include "opus_types.h"
+
+void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in);
+void compute_linear_neon(const LinearLayer *linear, float *out, const float *in);
+
+void compute_activation_neon(float *output, const float *input, int N, int activation);
+void compute_activation_dotprod(float *output, const float *input, int N, int activation);
+
+void compute_conv2d_neon(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+void compute_conv2d_dotprod(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+
+#if defined(OPUS_ARM_PRESUME_DOTPROD)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_dotprod(linear, out, in))
+
+#elif defined(OPUS_ARM_PRESUME_NEON_INTR) && !defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_neon(linear, out, in))
+
+#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
+
+extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
+ const LinearLayer *linear,
+ float *out,
+ const float *in
+ );
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) \
+ ((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
+
+
+#endif
+
+#if defined(OPUS_ARM_PRESUME_NEON)
+
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_neon(output, input, N, activation))
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_neon(conv, out, mem, in, height, hstride, activation))
+
+#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
+
+extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
+ float *output,
+ const float *input,
+ int N,
+ int activation
+ );
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) \
+ ((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
+
+
+extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
+ const Conv2dLayer *conv,
+ float *out,
+ float *mem,
+ const float *in,
+ int height,
+ int hstride,
+ int activation
+ );
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
+ ((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
+
+
+#endif
+
+
+#endif /* DNN_ARM_H */
diff --git a/dnn/arm/nnet_dotprod.c b/dnn/arm/nnet_dotprod.c
new file mode 100644
index 00000000..1354ed33
--- /dev/null
+++ b/dnn/arm/nnet_dotprod.c
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef __ARM_FEATURE_DOTPROD
+#error nnet_dotprod.c is being compiled without DOTPROD enabled
+#endif
+
+#define RTCD_ARCH dotprod
+
+#include "nnet_arch.h"
diff --git a/dnn/arm/nnet_neon.c b/dnn/arm/nnet_neon.c
new file mode 100644
index 00000000..fb636f85
--- /dev/null
+++ b/dnn/arm/nnet_neon.c
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if !(defined(__ARM_NEON__) || defined(__ARM_NEON))
+#error nnet_neon.c is being compiled without Neon enabled
+#endif
+
+#define RTCD_ARCH neon
+
+#include "nnet_arch.h"
diff --git a/dnn/burg.c b/dnn/burg.c
new file mode 100644
index 00000000..3d6b23b0
--- /dev/null
+++ b/dnn/burg.c
@@ -0,0 +1,246 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+
+#include "arch.h"
+#include "burg.h"
+
+#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
+#define SILK_MAX_ORDER_LPC 16
+#define FIND_LPC_COND_FAC 1e-5f
+
+/* sum of squares of a silk_float array, with result as double */
+static double silk_energy_FLP(
+ const float *data,
+ int dataSize
+)
+{
+ int i;
+ double result;
+
+ /* 4x unrolled loop */
+ result = 0.0;
+ for( i = 0; i < dataSize - 3; i += 4 ) {
+ result += data[ i + 0 ] * (double)data[ i + 0 ] +
+ data[ i + 1 ] * (double)data[ i + 1 ] +
+ data[ i + 2 ] * (double)data[ i + 2 ] +
+ data[ i + 3 ] * (double)data[ i + 3 ];
+ }
+
+ /* add any remaining products */
+ for( ; i < dataSize; i++ ) {
+ result += data[ i ] * (double)data[ i ];
+ }
+
+ assert( result >= 0.0 );
+ return result;
+}
+
+/* inner product of two silk_float arrays, with result as double */
+static double silk_inner_product_FLP(
+ const float *data1,
+ const float *data2,
+ int dataSize
+)
+{
+ int i;
+ double result;
+
+ /* 4x unrolled loop */
+ result = 0.0;
+ for( i = 0; i < dataSize - 3; i += 4 ) {
+ result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
+ data1[ i + 1 ] * (double)data2[ i + 1 ] +
+ data1[ i + 2 ] * (double)data2[ i + 2 ] +
+ data1[ i + 3 ] * (double)data2[ i + 3 ];
+ }
+
+ /* add any remaining products */
+ for( ; i < dataSize; i++ ) {
+ result += data1[ i ] * (double)data2[ i ];
+ }
+
+ return result;
+}
+
+
+/* Compute reflection coefficients from input signal */
+float silk_burg_analysis( /* O returns residual energy */
+ float A[], /* O prediction coefficients (length order) */
+ const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
+ const float minInvGain, /* I minimum inverse prediction gain */
+ const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
+ const int nb_subfr, /* I number of subframes stacked in x */
+ const int D /* I order */
+)
+{
+ int k, n, s, reached_max_gain;
+ double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
+ const float *x_ptr;
+ double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
+ double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
+ double Af[ SILK_MAX_ORDER_LPC ];
+
+ assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+ /* Compute autocorrelations, added over subframes */
+ C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
+ memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ for( n = 1; n < D + 1; n++ ) {
+ C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
+ }
+ }
+ memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
+
+ /* Initialize */
+ CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
+ invGain = 1.0f;
+ reached_max_gain = 0;
+ for( n = 0; n < D; n++ ) {
+ /* Update first row of correlation matrix (without first element) */
+ /* Update last row of correlation matrix (without last element, stored in reversed order) */
+ /* Update C * Af */
+ /* Update C * flipud(Af) (stored in reversed order) */
+ for( s = 0; s < nb_subfr; s++ ) {
+ x_ptr = x + s * subfr_length;
+ tmp1 = x_ptr[ n ];
+ tmp2 = x_ptr[ subfr_length - n - 1 ];
+ for( k = 0; k < n; k++ ) {
+ C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
+ C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
+ Atmp = Af[ k ];
+ tmp1 += x_ptr[ n - k - 1 ] * Atmp;
+ tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
+ }
+ for( k = 0; k <= n; k++ ) {
+ CAf[ k ] -= tmp1 * x_ptr[ n - k ];
+ CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
+ }
+ }
+ tmp1 = C_first_row[ n ];
+ tmp2 = C_last_row[ n ];
+ for( k = 0; k < n; k++ ) {
+ Atmp = Af[ k ];
+ tmp1 += C_last_row[ n - k - 1 ] * Atmp;
+ tmp2 += C_first_row[ n - k - 1 ] * Atmp;
+ }
+ CAf[ n + 1 ] = tmp1;
+ CAb[ n + 1 ] = tmp2;
+
+ /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+ num = CAb[ n + 1 ];
+ nrg_b = CAb[ 0 ];
+ nrg_f = CAf[ 0 ];
+ for( k = 0; k < n; k++ ) {
+ Atmp = Af[ k ];
+ num += CAb[ n - k ] * Atmp;
+ nrg_b += CAb[ k + 1 ] * Atmp;
+ nrg_f += CAf[ k + 1 ] * Atmp;
+ }
+ assert( nrg_f > 0.0 );
+ assert( nrg_b > 0.0 );
+
+ /* Calculate the next order reflection (parcor) coefficient */
+ rc = -2.0 * num / ( nrg_f + nrg_b );
+ assert( rc > -1.0 && rc < 1.0 );
+
+ /* Update inverse prediction gain */
+ tmp1 = invGain * ( 1.0 - rc * rc );
+ if( tmp1 <= minInvGain ) {
+ /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+ rc = sqrt( 1.0 - minInvGain / invGain );
+ if( num > 0 ) {
+ /* Ensure adjusted reflection coefficients has the original sign */
+ rc = -rc;
+ }
+ invGain = minInvGain;
+ reached_max_gain = 1;
+ } else {
+ invGain = tmp1;
+ }
+
+ /* Update the AR coefficients */
+ for( k = 0; k < (n + 1) >> 1; k++ ) {
+ tmp1 = Af[ k ];
+ tmp2 = Af[ n - k - 1 ];
+ Af[ k ] = tmp1 + rc * tmp2;
+ Af[ n - k - 1 ] = tmp2 + rc * tmp1;
+ }
+ Af[ n ] = rc;
+
+ if( reached_max_gain ) {
+ /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+ for( k = n + 1; k < D; k++ ) {
+ Af[ k ] = 0.0;
+ }
+ break;
+ }
+
+ /* Update C * Af and C * Ab */
+ for( k = 0; k <= n + 1; k++ ) {
+ tmp1 = CAf[ k ];
+ CAf[ k ] += rc * CAb[ n - k + 1 ];
+ CAb[ n - k + 1 ] += rc * tmp1;
+ }
+ }
+
+ if( reached_max_gain ) {
+ /* Convert to float */
+ for( k = 0; k < D; k++ ) {
+ A[ k ] = (float)( -Af[ k ] );
+ }
+ /* Subtract energy of preceding samples from C0 */
+ for( s = 0; s < nb_subfr; s++ ) {
+ C0 -= silk_energy_FLP( x + s * subfr_length, D );
+ }
+ /* Approximate residual energy */
+ nrg_f = C0 * invGain;
+ } else {
+ /* Compute residual energy and store coefficients as float */
+ nrg_f = CAf[ 0 ];
+ tmp1 = 1.0;
+ for( k = 0; k < D; k++ ) {
+ Atmp = Af[ k ];
+ nrg_f += CAf[ k + 1 ] * Atmp;
+ tmp1 += Atmp * Atmp;
+ A[ k ] = (float)(-Atmp);
+ }
+ nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
+ }
+
+ /* Return residual energy */
+ return MAX32(0, (float)nrg_f);
+}
diff --git a/dnn/burg.h b/dnn/burg.h
new file mode 100644
index 00000000..f5998d2f
--- /dev/null
+++ b/dnn/burg.h
@@ -0,0 +1,41 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef BURG_H
+#define BURG_H
+
+
+float silk_burg_analysis( /* O returns residual energy */
+ float A[], /* O prediction coefficients (length order) */
+ const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
+ const float minInvGain, /* I minimum inverse prediction gain */
+ const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
+ const int nb_subfr, /* I number of subframes stacked in x */
+ const int D /* I order */
+);
+
+#endif
diff --git a/dnn/common.h b/dnn/common.h
new file mode 100644
index 00000000..b3bec730
--- /dev/null
+++ b/dnn/common.h
@@ -0,0 +1,56 @@
+
+
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "opus_defines.h"
+
+#define LOG256 5.5451774445f
+static OPUS_INLINE float log2_approx(float x)
+{
+ int integer;
+ float frac;
+ union {
+ float f;
+ int i;
+ } in;
+ in.f = x;
+ integer = (in.i>>23)-127;
+ in.i -= integer<<23;
+ frac = in.f - 1.5f;
+ frac = -0.41445418f + frac*(0.95909232f
+ + frac*(-0.33951290f + frac*0.16541097f));
+ return 1+integer+frac;
+}
+
+#define log_approx(x) (0.69315f*log2_approx(x))
+
+static OPUS_INLINE float ulaw2lin(float u)
+{
+ float s;
+ float scale_1 = 32768.f/255.f;
+ u = u - 128.f;
+ s = u >= 0.f ? 1.f : -1.f;
+ u = fabs(u);
+ return s*scale_1*(exp(u/128.*LOG256)-1);
+}
+
+static OPUS_INLINE int lin2ulaw(float x)
+{
+ float u;
+ float scale = 255.f/32768.f;
+ int s = x >= 0 ? 1 : -1;
+ x = fabs(x);
+ u = (s*(128*log_approx(1+scale*x)/LOG256));
+ u = 128 + u;
+ if (u < 0) u = 0;
+ if (u > 255) u = 255;
+ return (int)floor(.5 + u);
+}
+
+
+
+#endif
diff --git a/dnn/datasets.txt b/dnn/datasets.txt
new file mode 100644
index 00000000..00445216
--- /dev/null
+++ b/dnn/datasets.txt
@@ -0,0 +1,173 @@
+The following datasets can be used to train a language-independent LPCNet model.
+A good choice is to include all the data from these datasets, except for
+hi_fi_tts for which only a small subset is recommended (since it's very large
+but has few speakers). Note that this data typically needs to be resampled
+before it can be used.
+
+https://www.openslr.org/resources/30/si_lk.tar.gz
+https://www.openslr.org/resources/32/af_za.tar.gz
+https://www.openslr.org/resources/32/st_za.tar.gz
+https://www.openslr.org/resources/32/tn_za.tar.gz
+https://www.openslr.org/resources/32/xh_za.tar.gz
+https://www.openslr.org/resources/37/bn_bd.zip
+https://www.openslr.org/resources/37/bn_in.zip
+https://www.openslr.org/resources/41/jv_id_female.zip
+https://www.openslr.org/resources/41/jv_id_male.zip
+https://www.openslr.org/resources/42/km_kh_male.zip
+https://www.openslr.org/resources/43/ne_np_female.zip
+https://www.openslr.org/resources/44/su_id_female.zip
+https://www.openslr.org/resources/44/su_id_male.zip
+https://www.openslr.org/resources/61/es_ar_female.zip
+https://www.openslr.org/resources/61/es_ar_male.zip
+https://www.openslr.org/resources/63/ml_in_female.zip
+https://www.openslr.org/resources/63/ml_in_male.zip
+https://www.openslr.org/resources/64/mr_in_female.zip
+https://www.openslr.org/resources/65/ta_in_female.zip
+https://www.openslr.org/resources/65/ta_in_male.zip
+https://www.openslr.org/resources/66/te_in_female.zip
+https://www.openslr.org/resources/66/te_in_male.zip
+https://www.openslr.org/resources/69/ca_es_female.zip
+https://www.openslr.org/resources/69/ca_es_male.zip
+https://www.openslr.org/resources/70/en_ng_female.zip
+https://www.openslr.org/resources/70/en_ng_male.zip
+https://www.openslr.org/resources/71/es_cl_female.zip
+https://www.openslr.org/resources/71/es_cl_male.zip
+https://www.openslr.org/resources/72/es_co_female.zip
+https://www.openslr.org/resources/72/es_co_male.zip
+https://www.openslr.org/resources/73/es_pe_female.zip
+https://www.openslr.org/resources/73/es_pe_male.zip
+https://www.openslr.org/resources/74/es_pr_female.zip
+https://www.openslr.org/resources/75/es_ve_female.zip
+https://www.openslr.org/resources/75/es_ve_male.zip
+https://www.openslr.org/resources/76/eu_es_female.zip
+https://www.openslr.org/resources/76/eu_es_male.zip
+https://www.openslr.org/resources/77/gl_es_female.zip
+https://www.openslr.org/resources/77/gl_es_male.zip
+https://www.openslr.org/resources/78/gu_in_female.zip
+https://www.openslr.org/resources/78/gu_in_male.zip
+https://www.openslr.org/resources/79/kn_in_female.zip
+https://www.openslr.org/resources/79/kn_in_male.zip
+https://www.openslr.org/resources/80/my_mm_female.zip
+https://www.openslr.org/resources/83/irish_english_male.zip
+https://www.openslr.org/resources/83/midlands_english_female.zip
+https://www.openslr.org/resources/83/midlands_english_male.zip
+https://www.openslr.org/resources/83/northern_english_female.zip
+https://www.openslr.org/resources/83/northern_english_male.zip
+https://www.openslr.org/resources/83/scottish_english_female.zip
+https://www.openslr.org/resources/83/scottish_english_male.zip
+https://www.openslr.org/resources/83/southern_english_female.zip
+https://www.openslr.org/resources/83/southern_english_male.zip
+https://www.openslr.org/resources/83/welsh_english_female.zip
+https://www.openslr.org/resources/83/welsh_english_male.zip
+https://www.openslr.org/resources/86/yo_ng_female.zip
+https://www.openslr.org/resources/86/yo_ng_male.zip
+https://www.openslr.org/resources/109/hi_fi_tts_v0.tar.gz
+
+The corresponding citations for all these datasets are:
+
+ @inproceedings{demirsahin-etal-2020-open,
+ title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},
+ author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},
+ booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
+ month = may,
+ year = {2020},
+ pages = {6532--6541},
+ address = {Marseille, France},
+ publisher = {European Language Resources Association (ELRA)},
+ url = {https://www.aclweb.org/anthology/2020.lrec-1.804},
+ ISBN = {979-10-95546-34-4},
+ }
+ @inproceedings{kjartansson-etal-2020-open,
+ title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},
+ author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},
+ booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},
+ year = {2020},
+ pages = {21--27},
+ month = may,
+ address = {Marseille, France},
+ publisher = {European Language Resources association (ELRA)},
+ url = {https://www.aclweb.org/anthology/2020.sltu-1.3},
+ ISBN = {979-10-95546-35-1},
+ }
+
+
+ @inproceedings{guevara-rukoz-etal-2020-crowdsourcing,
+ title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},
+ author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin, Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},
+ booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
+ year = {2020},
+ month = may,
+ address = {Marseille, France},
+ publisher = {European Language Resources Association (ELRA)},
+ url = {https://www.aclweb.org/anthology/2020.lrec-1.801},
+ pages = {6504--6513},
+ ISBN = {979-10-95546-34-4},
+ }
+ @inproceedings{he-etal-2020-open,
+ title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and Telugu Speech Synthesis Systems}},
+ author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin, Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},
+ booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
+ month = may,
+ year = {2020},
+ address = {Marseille, France},
+ publisher = {European Language Resources Association (ELRA)},
+ pages = {6494--6503},
+ url = {https://www.aclweb.org/anthology/2020.lrec-1.800},
+ ISBN = "{979-10-95546-34-4}",
+ }
+
+
+ @inproceedings{kjartansson-etal-tts-sltu2018,
+ title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese, Khmer, Nepali, Sinhala, and Sundanese}},
+ author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu De Silva and Supheakmungkol Sarin},
+ booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},
+ year = {2018},
+ address = {Gurugram, India},
+ month = aug,
+ pages = {66--70},
+ URL = {http://dx.doi.org/10.21437/SLTU.2018-14}
+ }
+
+
+ @inproceedings{oo-etal-2020-burmese,
+ title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application to Text-to-Speech}},
+ author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin, Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},
+ booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
+ month = may,
+ year = {2020},
+ pages = "6328--6339",
+ address = {Marseille, France},
+ publisher = {European Language Resources Association (ELRA)},
+ url = {https://www.aclweb.org/anthology/2020.lrec-1.777},
+ ISBN = {979-10-95546-34-4},
+ }
+ @inproceedings{van-niekerk-etal-2017,
+ title = {{Rapid development of TTS corpora for four South African languages}},
+ author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson and Martin Jansche and Linne Ha},
+ booktitle = {Proc. Interspeech 2017},
+ pages = {2178--2182},
+ address = {Stockholm, Sweden},
+ month = aug,
+ year = {2017},
+ URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}
+ }
+
+ @inproceedings{gutkin-et-al-yoruba2020,
+ title = {{Developing an Open-Source Corpus of Yoruba Speech}},
+ author = {Alexander Gutkin and I{\c{s}}{\i}n Demir{\c{s}}ahin and Oddur Kjartansson and Clara Rivera and K\d{\'o}lá Túb\d{\`o}sún},
+ booktitle = {Proceedings of Interspeech 2020},
+ pages = {404--408},
+ month = {October},
+ year = {2020},
+ address = {Shanghai, China},
+ publisher = {International Speech and Communication Association (ISCA)},
+ doi = {10.21437/Interspeech.2020-1096},
+ url = {http://dx.doi.org/10.21437/Interspeech.2020-1096},
+ }
+
+@article{bakhturina2021hi,
+ title={{Hi-Fi Multi-Speaker English TTS Dataset}},
+ author={Bakhturina, Evelina and Lavrukhin, Vitaly and Ginsburg, Boris and Zhang, Yang},
+ journal={arXiv preprint arXiv:2104.01497},
+ year={2021}
+}
diff --git a/dnn/download_model.bat b/dnn/download_model.bat
new file mode 100644
index 00000000..f49eaa59
--- /dev/null
+++ b/dnn/download_model.bat
@@ -0,0 +1,9 @@
+@echo off
+set model=opus_data-%1.tar.gz
+
+if not exist %model% (
+ echo Downloading latest model
+ powershell -Command "(New-Object System.Net.WebClient).DownloadFile('https://media.xiph.org/opus/models/%model%', '%model%')"
+)
+
+tar -xvzf %model%
diff --git a/dnn/download_model.sh b/dnn/download_model.sh
new file mode 100755
index 00000000..127441de
--- /dev/null
+++ b/dnn/download_model.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+set -e
+
+model=opus_data-$1.tar.gz
+
+if [ ! -f $model ]; then
+ echo "Downloading latest model"
+ wget https://media.xiph.org/opus/models/$model
+fi
+tar xvomf $model
diff --git a/dnn/dred_coding.c b/dnn/dred_coding.c
new file mode 100644
index 00000000..669ddc41
--- /dev/null
+++ b/dnn/dred_coding.c
@@ -0,0 +1,44 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+
+#include "celt/entenc.h"
+#include "os_support.h"
+#include "dred_config.h"
+#include "dred_coding.h"
+
+int compute_quantizer(int q0, int dQ, int qmax, int i) {
+ int quant;
+ static const int dQ_table[8] = {0, 2, 3, 4, 6, 8, 12, 16};
+ quant = q0 + (dQ_table[dQ]*i + 8)/16;
+ return quant > qmax ? qmax : quant;
+}
diff --git a/dnn/dred_coding.h b/dnn/dred_coding.h
new file mode 100644
index 00000000..1ce040c2
--- /dev/null
+++ b/dnn/dred_coding.h
@@ -0,0 +1,36 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DRED_CODING_H
+#define DRED_CODING_H
+
+#include "opus_types.h"
+#include "entcode.h"
+
+int compute_quantizer(int q0, int dQ, int qmax, int i);
+
+#endif
diff --git a/dnn/dred_config.h b/dnn/dred_config.h
new file mode 100644
index 00000000..88b2261f
--- /dev/null
+++ b/dnn/dred_config.h
@@ -0,0 +1,54 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DRED_CONFIG_H
+#define DRED_CONFIG_H
+
+/* Change this once DRED gets an extension number assigned. */
+#define DRED_EXTENSION_ID 126
+
+/* Remove these two completely once DRED gets an extension number assigned. */
+#define DRED_EXPERIMENTAL_VERSION 10
+#define DRED_EXPERIMENTAL_BYTES 2
+
+
+#define DRED_MIN_BYTES 8
+
+/* these are inpart duplicates to the values defined in dred_rdovae_constants.h */
+#define DRED_SILK_ENCODER_DELAY (79+12-80)
+#define DRED_FRAME_SIZE 160
+#define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE))
+#define DRED_MAX_DATA_SIZE 1000
+#define DRED_ENC_Q0 6
+#define DRED_ENC_Q1 15
+
+/* Covers 1.04 second so we can cover one second, after the lookahead. */
+#define DRED_MAX_LATENTS 26
+#define DRED_NUM_REDUNDANCY_FRAMES (2*DRED_MAX_LATENTS)
+#define DRED_MAX_FRAMES (4*DRED_MAX_LATENTS)
+
+#endif
diff --git a/dnn/dred_decoder.c b/dnn/dred_decoder.c
new file mode 100644
index 00000000..1b284330
--- /dev/null
+++ b/dnn/dred_decoder.c
@@ -0,0 +1,129 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <string.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "os_support.h"
+#include "dred_decoder.h"
+#include "dred_coding.h"
+#include "celt/entdec.h"
+#include "celt/laplace.h"
+#include "dred_rdovae_stats_data.h"
+#include "dred_rdovae_constants.h"
+
+static void dred_decode_latents(ec_dec *dec, float *x, const opus_uint8 *scale, const opus_uint8 *r, const opus_uint8 *p0, int dim) {
+ int i;
+ for (i=0;i<dim;i++) {
+ int q;
+ if (r[i] == 0 || p0[i] == 255) q = 0;
+ else q = ec_laplace_decode_p0(dec, p0[i]<<7, r[i]<<7);
+ x[i] = q*256.f/(scale[i] == 0 ? 1 : scale[i]);
+ }
+}
+
+int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int min_feature_frames, int dred_frame_offset)
+{
+ ec_dec ec;
+ int q_level;
+ int i;
+ int offset;
+ int q0;
+ int dQ;
+ int qmax;
+ int state_qoffset;
+ int extra_offset;
+
+ /* since features are decoded in quadruples, it makes no sense to go with an uneven number of redundancy frames */
+ celt_assert(DRED_NUM_REDUNDANCY_FRAMES % 2 == 0);
+
+ /* decode initial state and initialize RDOVAE decoder */
+ ec_dec_init(&ec, (unsigned char*)bytes, num_bytes);
+ q0 = ec_dec_uint(&ec, 16);
+ dQ = ec_dec_uint(&ec, 8);
+ if (ec_dec_uint(&ec, 2)) extra_offset = 32*ec_dec_uint(&ec, 256);
+ else extra_offset = 0;
+ /* Compute total offset, including DRED position in a multiframe packet. */
+ dec->dred_offset = 16 - ec_dec_uint(&ec, 32) - extra_offset + dred_frame_offset;
+ /*printf("%d %d %d\n", dred_offset, q0, dQ);*/
+ qmax = 15;
+ if (q0 < 14 && dQ > 0) {
+ int nvals;
+ int ft;
+ int s;
+ /* The distribution for the dQmax symbol is split evenly between zero
+ (which implies qmax == 15) and larger values, with the probability of
+ all larger values being uniform.
+ This is equivalent to coding 1 bit to decide if the maximum is less than
+ 15 followed by a uint to decide the actual value if it is less than
+ 15, but combined into a single symbol. */
+ nvals = 15 - (q0 + 1);
+ ft = 2*nvals;
+ s = ec_decode(&ec, ft);
+ if (s >= nvals) {
+ qmax = q0 + (s - nvals) + 1;
+ ec_dec_update(&ec, s, s + 1, ft);
+ }
+ else {
+ ec_dec_update(&ec, 0, nvals, ft);
+ }
+ }
+ state_qoffset = q0*DRED_STATE_DIM;
+ dred_decode_latents(
+ &ec,
+ dec->state,
+ dred_state_quant_scales_q8 + state_qoffset,
+ dred_state_r_q8 + state_qoffset,
+ dred_state_p0_q8 + state_qoffset,
+ DRED_STATE_DIM);
+
+ /* decode newest to oldest and store oldest to newest */
+ for (i = 0; i < IMIN(DRED_NUM_REDUNDANCY_FRAMES, (min_feature_frames+1)/2); i += 2)
+ {
+ /* FIXME: Figure out how to avoid missing a last frame that would take up < 8 bits. */
+ if (8*num_bytes - ec_tell(&ec) <= 7)
+ break;
+ q_level = compute_quantizer(q0, dQ, qmax, i/2);
+ offset = q_level*DRED_LATENT_DIM;
+ dred_decode_latents(
+ &ec,
+ &dec->latents[(i/2)*DRED_LATENT_DIM],
+ dred_latent_quant_scales_q8 + offset,
+ dred_latent_r_q8 + offset,
+ dred_latent_p0_q8 + offset,
+ DRED_LATENT_DIM
+ );
+
+ offset = 2 * i * DRED_NUM_FEATURES;
+ }
+ dec->process_stage = 1;
+ dec->nb_latents = i/2;
+ return i/2;
+}
diff --git a/dnn/dred_decoder.h b/dnn/dred_decoder.h
new file mode 100644
index 00000000..dcd8121d
--- /dev/null
+++ b/dnn/dred_decoder.h
@@ -0,0 +1,49 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DRED_DECODER_H
+#define DRED_DECODER_H
+
+#include "opus.h"
+#include "dred_config.h"
+#include "dred_rdovae.h"
+#include "entcode.h"
+#include "dred_rdovae_constants.h"
+
+struct OpusDRED {
+ float fec_features[2*DRED_NUM_REDUNDANCY_FRAMES*DRED_NUM_FEATURES];
+ float state[DRED_STATE_DIM];
+ float latents[(DRED_NUM_REDUNDANCY_FRAMES/2)*DRED_LATENT_DIM];
+ int nb_latents;
+ int process_stage;
+ int dred_offset;
+};
+
+
+int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int min_feature_frames, int dred_frame_offset);
+
+#endif
diff --git a/dnn/dred_encoder.c b/dnn/dred_encoder.c
new file mode 100644
index 00000000..edb49cc2
--- /dev/null
+++ b/dnn/dred_encoder.c
@@ -0,0 +1,363 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string.h>
+
+#if 0
+#include <stdio.h>
+#include <math.h>
+#endif
+
+#include "dred_encoder.h"
+#include "dred_coding.h"
+#include "celt/entenc.h"
+
+#include "dred_decoder.h"
+#include "float_cast.h"
+#include "os_support.h"
+#include "celt/laplace.h"
+#include "dred_rdovae_stats_data.h"
+
+
+static void DRED_rdovae_init_encoder(RDOVAEEncState *enc_state)
+{
+ memset(enc_state, 0, sizeof(*enc_state));
+}
+
+int dred_encoder_load_model(DREDEnc* enc, const void *data, int len)
+{
+ WeightArray *list;
+ int ret;
+ parse_weights(&list, data, len);
+ ret = init_rdovaeenc(&enc->model, list);
+ opus_free(list);
+ if (ret == 0) {
+ ret = lpcnet_encoder_load_model(&enc->lpcnet_enc_state, data, len);
+ }
+ if (ret == 0) enc->loaded = 1;
+ return (ret == 0) ? OPUS_OK : OPUS_BAD_ARG;
+}
+
+void dred_encoder_reset(DREDEnc* enc)
+{
+ OPUS_CLEAR((char*)&enc->DREDENC_RESET_START,
+ sizeof(DREDEnc)-
+ ((char*)&enc->DREDENC_RESET_START - (char*)enc));
+ enc->input_buffer_fill = DRED_SILK_ENCODER_DELAY;
+ lpcnet_encoder_init(&enc->lpcnet_enc_state);
+ DRED_rdovae_init_encoder(&enc->rdovae_enc);
+}
+
+void dred_encoder_init(DREDEnc* enc, opus_int32 Fs, int channels)
+{
+ enc->Fs = Fs;
+ enc->channels = channels;
+ enc->loaded = 0;
+#ifndef USE_WEIGHTS_FILE
+ if (init_rdovaeenc(&enc->model, rdovaeenc_arrays) == 0) enc->loaded = 1;
+#endif
+ dred_encoder_reset(enc);
+}
+
+static void dred_process_frame(DREDEnc *enc, int arch)
+{
+ float feature_buffer[2 * 36];
+ float input_buffer[2*DRED_NUM_FEATURES] = {0};
+
+ celt_assert(enc->loaded);
+ /* shift latents buffer */
+ OPUS_MOVE(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM);
+ OPUS_MOVE(enc->state_buffer + DRED_STATE_DIM, enc->state_buffer, (DRED_MAX_FRAMES - 1) * DRED_STATE_DIM);
+
+ /* calculate LPCNet features */
+ lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer, feature_buffer, arch);
+ lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, feature_buffer + 36, arch);
+
+ /* prepare input buffer (discard LPC coefficients) */
+ OPUS_COPY(input_buffer, feature_buffer, DRED_NUM_FEATURES);
+ OPUS_COPY(input_buffer + DRED_NUM_FEATURES, feature_buffer + 36, DRED_NUM_FEATURES);
+
+ /* run RDOVAE encoder */
+ dred_rdovae_encode_dframe(&enc->rdovae_enc, &enc->model, enc->latents_buffer, enc->state_buffer, input_buffer, arch);
+ enc->latents_buffer_fill = IMIN(enc->latents_buffer_fill+1, DRED_NUM_REDUNDANCY_FRAMES);
+}
+
+void filter_df2t(const float *in, float *out, int len, float b0, const float *b, const float *a, int order, float *mem)
+{
+ int i;
+ for (i=0;i<len;i++) {
+ int j;
+ float xi, yi, nyi;
+ xi = in[i];
+ yi = xi*b0 + mem[0];
+ nyi = -yi;
+ for (j=0;j<order;j++)
+ {
+ mem[j] = mem[j+1] + b[j]*xi + a[j]*nyi;
+ }
+ out[i] = yi;
+ /*fprintf(stdout, "%f\n", out[i]);*/
+ }
+}
+
+#define MAX_DOWNMIX_BUFFER (960*2)
+static void dred_convert_to_16k(DREDEnc *enc, const float *in, int in_len, float *out, int out_len)
+{
+ float downmix[MAX_DOWNMIX_BUFFER];
+ int i;
+ int up;
+ celt_assert(enc->channels*in_len <= MAX_DOWNMIX_BUFFER);
+ celt_assert(in_len * (opus_int32)16000 == out_len * enc->Fs);
+ switch(enc->Fs) {
+ case 8000:
+ up = 2;
+ break;
+ case 12000:
+ up = 4;
+ break;
+ case 16000:
+ up = 1;
+ break;
+ case 24000:
+ up = 2;
+ break;
+ case 48000:
+ up = 1;
+ break;
+ default:
+ celt_assert(0);
+ }
+ OPUS_CLEAR(downmix, up*in_len);
+ if (enc->channels == 1) {
+ for (i=0;i<in_len;i++) downmix[up*i] = FLOAT2INT16(up*in[i]);
+ } else {
+ for (i=0;i<in_len;i++) downmix[up*i] = FLOAT2INT16(.5*up*(in[2*i]+in[2*i+1]));
+ }
+ if (enc->Fs == 16000) {
+ OPUS_COPY(out, downmix, out_len);
+ } else if (enc->Fs == 48000 || enc->Fs == 24000) {
+ /* ellip(7, .2, 70, 7750/24000) */
+
+ static const float filter_b[8] = { 0.005873358047f, 0.012980854831f, 0.014531340042f, 0.014531340042f, 0.012980854831f, 0.005873358047f, 0.004523418224f, 0.f};
+ static const float filter_a[8] = {-3.878718597768f, 7.748834257468f, -9.653651699533f, 8.007342726666f, -4.379450178552f, 1.463182111810f, -0.231720677804f, 0.f};
+ float b0 = 0.004523418224f;
+ filter_df2t(downmix, downmix, up*in_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
+ for (i=0;i<out_len;i++) out[i] = downmix[3*i];
+ } else if (enc->Fs == 12000) {
+ /* ellip(7, .2, 70, 7750/24000) */
+ static const float filter_b[8] = {-0.001017101081f, 0.003673127243f, 0.001009165267f, 0.001009165267f, 0.003673127243f, -0.001017101081f, 0.002033596776f, 0.f};
+ static const float filter_a[8] = {-4.930414411612f, 11.291643096504f, -15.322037343815f, 13.216403930898f, -7.220409219553f, 2.310550142771f, -0.334338618782f, 0.f};
+ float b0 = 0.002033596776f;
+ filter_df2t(downmix, downmix, up*in_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
+ for (i=0;i<out_len;i++) out[i] = downmix[3*i];
+ } else if (enc->Fs == 8000) {
+ /* ellip(7, .2, 70, 3900/8000) */
+ static const float filter_b[8] = { 0.081670120929f, 0.180401598565f, 0.259391051971f, 0.259391051971f, 0.180401598565f, 0.081670120929f, 0.020109185709f, 0.f};
+ static const float filter_a[8] = {-1.393651933659f, 2.609789872676f, -2.403541968806f, 2.056814957331f, -1.148908574570f, 0.473001413788f, -0.110359852412f, 0.f};
+ float b0 = 0.020109185709f;
+ filter_df2t(downmix, out, out_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
+ } else {
+ celt_assert(0);
+ }
+}
+
+void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay, int arch)
+{
+ int curr_offset16k;
+ int frame_size16k = frame_size * 16000 / enc->Fs;
+ celt_assert(enc->loaded);
+ curr_offset16k = 40 + extra_delay*16000/enc->Fs - enc->input_buffer_fill;
+ enc->dred_offset = (int)floor((curr_offset16k+20.f)/40.f);
+ enc->latent_offset = 0;
+ while (frame_size16k > 0) {
+ int process_size16k;
+ int process_size;
+ process_size16k = IMIN(2*DRED_FRAME_SIZE, frame_size16k);
+ process_size = process_size16k * enc->Fs / 16000;
+ dred_convert_to_16k(enc, pcm, process_size, &enc->input_buffer[enc->input_buffer_fill], process_size16k);
+ enc->input_buffer_fill += process_size16k;
+ if (enc->input_buffer_fill >= 2*DRED_FRAME_SIZE)
+ {
+ curr_offset16k += 320;
+ dred_process_frame(enc, arch);
+ enc->input_buffer_fill -= 2*DRED_FRAME_SIZE;
+ OPUS_MOVE(&enc->input_buffer[0], &enc->input_buffer[2*DRED_FRAME_SIZE], enc->input_buffer_fill);
+ /* 15 ms (6*2.5 ms) is the ideal offset for DRED because it corresponds to our vocoder look-ahead. */
+ if (enc->dred_offset < 6) {
+ enc->dred_offset += 8;
+ } else {
+ enc->latent_offset++;
+ }
+ }
+
+ pcm += process_size;
+ frame_size16k -= process_size16k;
+ }
+}
+
+static void dred_encode_latents(ec_enc *enc, const float *x, const opus_uint8 *scale, const opus_uint8 *dzone, const opus_uint8 *r, const opus_uint8 *p0, int dim, int arch) {
+ int i;
+ int q[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
+ float xq[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
+ float delta[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
+ float deadzone[IMAX(DRED_LATENT_DIM,DRED_STATE_DIM)];
+ float eps = .1f;
+ /* This is split into multiple loops (with temporary arrays) so that the compiler
+ can vectorize all of it, and so we can call the vector tanh(). */
+ for (i=0;i<dim;i++) {
+ delta[i] = dzone[i]*(1.f/256.f);
+ xq[i] = x[i]*scale[i]*(1.f/256.f);
+ deadzone[i] = xq[i]/(delta[i]+eps);
+ }
+ compute_activation(deadzone, deadzone, dim, ACTIVATION_TANH, arch);
+ for (i=0;i<dim;i++) {
+ xq[i] = xq[i] - delta[i]*deadzone[i];
+ q[i] = (int)floor(.5f+xq[i]);
+ }
+ for (i=0;i<dim;i++) {
+ /* Make the impossible actually impossible. */
+ if (r[i] == 0 || p0[i] == 255) q[i] = 0;
+ else ec_laplace_encode_p0(enc, q[i], p0[i]<<7, r[i]<<7);
+ }
+}
+
+static int dred_voice_active(const unsigned char *activity_mem, int offset) {
+ int i;
+ for (i=0;i<16;i++) {
+ if (activity_mem[8*offset + i] == 1) return 1;
+ }
+ return 0;
+}
+
+int dred_encode_silk_frame(DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes, int q0, int dQ, int qmax, unsigned char *activity_mem, int arch) {
+ ec_enc ec_encoder;
+
+ int q_level;
+ int i;
+ int offset;
+ int ec_buffer_fill;
+ int state_qoffset;
+ ec_enc ec_bak;
+ int prev_active=0;
+ int latent_offset;
+ int extra_dred_offset=0;
+ int dred_encoded=0;
+ int delayed_dred=0;
+ int total_offset;
+
+ latent_offset = enc->latent_offset;
+ /* Delaying new DRED data when just out of silence because we already have the
+ main Opus payload for that frame. */
+ if (activity_mem[0] && enc->last_extra_dred_offset>0) {
+ latent_offset = enc->last_extra_dred_offset;
+ delayed_dred = 1;
+ enc->last_extra_dred_offset = 0;
+ }
+ while (latent_offset < enc->latents_buffer_fill && !dred_voice_active(activity_mem, latent_offset)) {
+ latent_offset++;
+ extra_dred_offset++;
+ }
+ if (!delayed_dred) enc->last_extra_dred_offset = extra_dred_offset;
+
+ /* entropy coding of state and latents */
+ ec_enc_init(&ec_encoder, buf, max_bytes);
+ ec_enc_uint(&ec_encoder, q0, 16);
+ ec_enc_uint(&ec_encoder, dQ, 8);
+ total_offset = 16 - (enc->dred_offset - extra_dred_offset*8);
+ celt_assert(total_offset>=0);
+ if (total_offset > 31) {
+ ec_enc_uint(&ec_encoder, 1, 2);
+ ec_enc_uint(&ec_encoder, total_offset>>5, 256);
+ ec_enc_uint(&ec_encoder, total_offset&31, 32);
+ } else {
+ ec_enc_uint(&ec_encoder, 0, 2);
+ ec_enc_uint(&ec_encoder, total_offset, 32);
+ }
+ celt_assert(qmax >= q0);
+ if (q0 < 14 && dQ > 0) {
+ int nvals;
+ /* If you want to use qmax == q0, you should have set dQ = 0. */
+ celt_assert(qmax > q0);
+ nvals = 15 - (q0 + 1);
+ ec_encode(&ec_encoder, qmax >= 15 ? 0 : nvals + qmax - (q0 + 1),
+ qmax >= 15 ? nvals : nvals + qmax - q0, 2*nvals);
+ }
+ state_qoffset = q0*DRED_STATE_DIM;
+ dred_encode_latents(
+ &ec_encoder,
+ &enc->state_buffer[latent_offset*DRED_STATE_DIM],
+ dred_state_quant_scales_q8 + state_qoffset,
+ dred_state_dead_zone_q8 + state_qoffset,
+ dred_state_r_q8 + state_qoffset,
+ dred_state_p0_q8 + state_qoffset,
+ DRED_STATE_DIM,
+ arch);
+ if (ec_tell(&ec_encoder) > 8*max_bytes) {
+ return 0;
+ }
+ ec_bak = ec_encoder;
+ for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-latent_offset-1); i += 2)
+ {
+ int active;
+ q_level = compute_quantizer(q0, dQ, qmax, i/2);
+ offset = q_level * DRED_LATENT_DIM;
+
+ dred_encode_latents(
+ &ec_encoder,
+ enc->latents_buffer + (i+latent_offset) * DRED_LATENT_DIM,
+ dred_latent_quant_scales_q8 + offset,
+ dred_latent_dead_zone_q8 + offset,
+ dred_latent_r_q8 + offset,
+ dred_latent_p0_q8 + offset,
+ DRED_LATENT_DIM,
+ arch
+ );
+ if (ec_tell(&ec_encoder) > 8*max_bytes) {
+ /* If we haven't been able to code one chunk, give up on DRED completely. */
+ if (i==0) return 0;
+ break;
+ }
+ active = dred_voice_active(activity_mem, i+latent_offset);
+ if (active || prev_active) {
+ ec_bak = ec_encoder;
+ dred_encoded = i+2;
+ }
+ prev_active = active;
+ }
+ /* Avoid sending empty DRED packets. */
+ if (dred_encoded==0 || (dred_encoded<=2 && extra_dred_offset)) return 0;
+ ec_encoder = ec_bak;
+
+ ec_buffer_fill = (ec_tell(&ec_encoder)+7)/8;
+ ec_enc_shrink(&ec_encoder, ec_buffer_fill);
+ ec_enc_done(&ec_encoder);
+ return ec_buffer_fill;
+}
diff --git a/dnn/dred_encoder.h b/dnn/dred_encoder.h
new file mode 100644
index 00000000..6987222c
--- /dev/null
+++ b/dnn/dred_encoder.h
@@ -0,0 +1,71 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DRED_ENCODER_H
+#define DRED_ENCODER_H
+
+#include "lpcnet.h"
+#include "dred_config.h"
+#include "dred_rdovae.h"
+#include "entcode.h"
+#include "lpcnet_private.h"
+#include "dred_rdovae_enc.h"
+#include "dred_rdovae_enc_data.h"
+
+#define RESAMPLING_ORDER 8
+
+typedef struct {
+ RDOVAEEnc model;
+ LPCNetEncState lpcnet_enc_state;
+ RDOVAEEncState rdovae_enc;
+ int loaded;
+ opus_int32 Fs;
+ int channels;
+
+#define DREDENC_RESET_START input_buffer
+ float input_buffer[2*DRED_DFRAME_SIZE];
+ int input_buffer_fill;
+ int dred_offset;
+ int latent_offset;
+ int last_extra_dred_offset;
+ float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM];
+ int latents_buffer_fill;
+ float state_buffer[DRED_MAX_FRAMES * DRED_STATE_DIM];
+ float resample_mem[RESAMPLING_ORDER + 1];
+} DREDEnc;
+
+int dred_encoder_load_model(DREDEnc* enc, const void *data, int len);
+void dred_encoder_init(DREDEnc* enc, opus_int32 Fs, int channels);
+void dred_encoder_reset(DREDEnc* enc);
+
+void dred_deinit_encoder(DREDEnc *enc);
+
+void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay, int arch);
+
+int dred_encode_silk_frame(DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes, int q0, int dQ, int qmax, unsigned char *activity_mem, int arch);
+
+#endif
diff --git a/dnn/dred_rdovae.h b/dnn/dred_rdovae.h
new file mode 100644
index 00000000..89ea39ac
--- /dev/null
+++ b/dnn/dred_rdovae.h
@@ -0,0 +1,42 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DRED_RDOVAE_H
+#define DRED_RDOVAE_H
+
+#include <stdlib.h>
+
+#include "opus_types.h"
+
+typedef struct RDOVAEDec RDOVAEDec;
+typedef struct RDOVAEEnc RDOVAEEnc;
+typedef struct RDOVAEDecStruct RDOVAEDecState;
+typedef struct RDOVAEEncStruct RDOVAEEncState;
+
+
+
+#endif
diff --git a/dnn/dred_rdovae_dec.c b/dnn/dred_rdovae_dec.c
new file mode 100644
index 00000000..7797ee77
--- /dev/null
+++ b/dnn/dred_rdovae_dec.c
@@ -0,0 +1,139 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "dred_rdovae_dec.h"
+#include "dred_rdovae_constants.h"
+#include "os_support.h"
+
+static void conv1_cond_init(float *mem, int len, int dilation, int *init)
+{
+ if (!*init) {
+ int i;
+ for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
+ }
+ *init = 1;
+}
+
+void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch)
+{
+ int i;
+ RDOVAEDecState dec;
+ memset(&dec, 0, sizeof(dec));
+ dred_rdovae_dec_init_states(&dec, model, state, arch);
+ for (i = 0; i < 2*nb_latents; i += 2)
+ {
+ dred_rdovae_decode_qframe(
+ &dec,
+ model,
+ &features[2*i*DRED_NUM_FEATURES],
+ &latents[(i/2)*DRED_LATENT_DIM],
+ arch);
+ }
+}
+
+void dred_rdovae_dec_init_states(
+ RDOVAEDecState *h, /* io: state buffer handle */
+ const RDOVAEDec *model,
+ const float *initial_state, /* i: initial state */
+ int arch
+ )
+{
+ float hidden[DEC_HIDDEN_INIT_OUT_SIZE];
+ float state_init[DEC_GRU1_STATE_SIZE+DEC_GRU2_STATE_SIZE+DEC_GRU3_STATE_SIZE+DEC_GRU4_STATE_SIZE+DEC_GRU5_STATE_SIZE];
+ int counter=0;
+ compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH, arch);
+ compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH, arch);
+ OPUS_COPY(h->gru1_state, state_init, DEC_GRU1_STATE_SIZE);
+ counter += DEC_GRU1_STATE_SIZE;
+ OPUS_COPY(h->gru2_state, &state_init[counter], DEC_GRU2_STATE_SIZE);
+ counter += DEC_GRU2_STATE_SIZE;
+ OPUS_COPY(h->gru3_state, &state_init[counter], DEC_GRU3_STATE_SIZE);
+ counter += DEC_GRU3_STATE_SIZE;
+ OPUS_COPY(h->gru4_state, &state_init[counter], DEC_GRU4_STATE_SIZE);
+ counter += DEC_GRU4_STATE_SIZE;
+ OPUS_COPY(h->gru5_state, &state_init[counter], DEC_GRU5_STATE_SIZE);
+ h->initialized = 0;
+}
+
+
+void dred_rdovae_decode_qframe(
+ RDOVAEDecState *dec_state, /* io: state buffer handle */
+ const RDOVAEDec *model,
+ float *qframe, /* o: quadruple feature frame (four concatenated frames in reverse order) */
+ const float *input, /* i: latent vector */
+ int arch
+ )
+{
+ float buffer[DEC_DENSE1_OUT_SIZE + DEC_GRU1_OUT_SIZE + DEC_GRU2_OUT_SIZE + DEC_GRU3_OUT_SIZE + DEC_GRU4_OUT_SIZE + DEC_GRU5_OUT_SIZE
+ + DEC_CONV1_OUT_SIZE + DEC_CONV2_OUT_SIZE + DEC_CONV3_OUT_SIZE + DEC_CONV4_OUT_SIZE + DEC_CONV5_OUT_SIZE];
+ int output_index = 0;
+
+ /* run encoder stack and concatenate output in buffer*/
+ compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
+ output_index += DEC_DENSE1_OUT_SIZE;
+
+ compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer, arch);
+ compute_glu(&model->dec_glu1, &buffer[output_index], dec_state->gru1_state, arch);
+ output_index += DEC_GRU1_OUT_SIZE;
+ conv1_cond_init(dec_state->conv1_state, output_index, 1, &dec_state->initialized);
+ compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
+ output_index += DEC_CONV1_OUT_SIZE;
+
+ compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer, arch);
+ compute_glu(&model->dec_glu2, &buffer[output_index], dec_state->gru2_state, arch);
+ output_index += DEC_GRU2_OUT_SIZE;
+ conv1_cond_init(dec_state->conv2_state, output_index, 1, &dec_state->initialized);
+ compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH, arch);
+ output_index += DEC_CONV2_OUT_SIZE;
+
+ compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer, arch);
+ compute_glu(&model->dec_glu3, &buffer[output_index], dec_state->gru3_state, arch);
+ output_index += DEC_GRU3_OUT_SIZE;
+ conv1_cond_init(dec_state->conv3_state, output_index, 1, &dec_state->initialized);
+ compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH, arch);
+ output_index += DEC_CONV3_OUT_SIZE;
+
+ compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer, arch);
+ compute_glu(&model->dec_glu4, &buffer[output_index], dec_state->gru4_state, arch);
+ output_index += DEC_GRU4_OUT_SIZE;
+ conv1_cond_init(dec_state->conv4_state, output_index, 1, &dec_state->initialized);
+ compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH, arch);
+ output_index += DEC_CONV4_OUT_SIZE;
+
+ compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer, arch);
+ compute_glu(&model->dec_glu5, &buffer[output_index], dec_state->gru5_state, arch);
+ output_index += DEC_GRU5_OUT_SIZE;
+ conv1_cond_init(dec_state->conv5_state, output_index, 1, &dec_state->initialized);
+ compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH, arch);
+ output_index += DEC_CONV5_OUT_SIZE;
+
+ compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR, arch);
+}
diff --git a/dnn/dred_rdovae_dec.h b/dnn/dred_rdovae_dec.h
new file mode 100644
index 00000000..4e66911c
--- /dev/null
+++ b/dnn/dred_rdovae_dec.h
@@ -0,0 +1,53 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DRED_RDOVAE_DEC_H
+#define DRED_RDOVAE_DEC_H
+
+#include "dred_rdovae.h"
+#include "dred_rdovae_dec_data.h"
+#include "dred_rdovae_stats_data.h"
+
+struct RDOVAEDecStruct {
+ int initialized;
+ float gru1_state[DEC_GRU1_STATE_SIZE];
+ float gru2_state[DEC_GRU2_STATE_SIZE];
+ float gru3_state[DEC_GRU3_STATE_SIZE];
+ float gru4_state[DEC_GRU4_STATE_SIZE];
+ float gru5_state[DEC_GRU5_STATE_SIZE];
+ float conv1_state[DEC_CONV1_STATE_SIZE];
+ float conv2_state[DEC_CONV2_STATE_SIZE];
+ float conv3_state[DEC_CONV3_STATE_SIZE];
+ float conv4_state[DEC_CONV4_STATE_SIZE];
+ float conv5_state[DEC_CONV5_STATE_SIZE];
+};
+
+void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state, int arch);
+void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z, int arch);
+void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch);
+
+#endif
diff --git a/dnn/dred_rdovae_enc.c b/dnn/dred_rdovae_enc.c
new file mode 100644
index 00000000..4f13ae21
--- /dev/null
+++ b/dnn/dred_rdovae_enc.c
@@ -0,0 +1,110 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <math.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "dred_rdovae_enc.h"
+#include "os_support.h"
+#include "dred_rdovae_constants.h"
+
+static void conv1_cond_init(float *mem, int len, int dilation, int *init)
+{
+ if (!*init) {
+ int i;
+ for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
+ }
+ *init = 1;
+}
+
+void dred_rdovae_encode_dframe(
+ RDOVAEEncState *enc_state, /* io: encoder state */
+ const RDOVAEEnc *model,
+ float *latents, /* o: latent vector */
+ float *initial_state, /* o: initial state */
+ const float *input, /* i: double feature frame (concatenated) */
+ int arch
+ )
+{
+ float padded_latents[DRED_PADDED_LATENT_DIM];
+ float padded_state[DRED_PADDED_STATE_DIM];
+ float buffer[ENC_DENSE1_OUT_SIZE + ENC_GRU1_OUT_SIZE + ENC_GRU2_OUT_SIZE + ENC_GRU3_OUT_SIZE + ENC_GRU4_OUT_SIZE + ENC_GRU5_OUT_SIZE
+ + ENC_CONV1_OUT_SIZE + ENC_CONV2_OUT_SIZE + ENC_CONV3_OUT_SIZE + ENC_CONV4_OUT_SIZE + ENC_CONV5_OUT_SIZE];
+ float state_hidden[GDENSE1_OUT_SIZE];
+ int output_index = 0;
+
+ /* run encoder stack and concatenate output in buffer*/
+ compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
+ output_index += ENC_DENSE1_OUT_SIZE;
+
+ compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer, arch);
+ OPUS_COPY(&buffer[output_index], enc_state->gru1_state, ENC_GRU1_OUT_SIZE);
+ output_index += ENC_GRU1_OUT_SIZE;
+ conv1_cond_init(enc_state->conv1_state, output_index, 1, &enc_state->initialized);
+ compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
+ output_index += ENC_CONV1_OUT_SIZE;
+
+ compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer, arch);
+ OPUS_COPY(&buffer[output_index], enc_state->gru2_state, ENC_GRU2_OUT_SIZE);
+ output_index += ENC_GRU2_OUT_SIZE;
+ conv1_cond_init(enc_state->conv2_state, output_index, 2, &enc_state->initialized);
+ compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
+ output_index += ENC_CONV2_OUT_SIZE;
+
+ compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer, arch);
+ OPUS_COPY(&buffer[output_index], enc_state->gru3_state, ENC_GRU3_OUT_SIZE);
+ output_index += ENC_GRU3_OUT_SIZE;
+ conv1_cond_init(enc_state->conv3_state, output_index, 2, &enc_state->initialized);
+ compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
+ output_index += ENC_CONV3_OUT_SIZE;
+
+ compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer, arch);
+ OPUS_COPY(&buffer[output_index], enc_state->gru4_state, ENC_GRU4_OUT_SIZE);
+ output_index += ENC_GRU4_OUT_SIZE;
+ conv1_cond_init(enc_state->conv4_state, output_index, 2, &enc_state->initialized);
+ compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
+ output_index += ENC_CONV4_OUT_SIZE;
+
+ compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer, arch);
+ OPUS_COPY(&buffer[output_index], enc_state->gru5_state, ENC_GRU5_OUT_SIZE);
+ output_index += ENC_GRU5_OUT_SIZE;
+ conv1_cond_init(enc_state->conv5_state, output_index, 2, &enc_state->initialized);
+ compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
+ output_index += ENC_CONV5_OUT_SIZE;
+
+ compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR, arch);
+ OPUS_COPY(latents, padded_latents, DRED_LATENT_DIM);
+
+ /* next, calculate initial state */
+ compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH, arch);
+ compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR, arch);
+ OPUS_COPY(initial_state, padded_state, DRED_STATE_DIM);
+}
diff --git a/dnn/dred_rdovae_enc.h b/dnn/dred_rdovae_enc.h
new file mode 100644
index 00000000..6fe537ee
--- /dev/null
+++ b/dnn/dred_rdovae_enc.h
@@ -0,0 +1,52 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DRED_RDOVAE_ENC_H
+#define DRED_RDOVAE_ENC_H
+
+#include "dred_rdovae.h"
+
+#include "dred_rdovae_enc_data.h"
+
+struct RDOVAEEncStruct {
+ int initialized;
+ float gru1_state[ENC_GRU1_STATE_SIZE];
+ float gru2_state[ENC_GRU2_STATE_SIZE];
+ float gru3_state[ENC_GRU3_STATE_SIZE];
+ float gru4_state[ENC_GRU4_STATE_SIZE];
+ float gru5_state[ENC_GRU5_STATE_SIZE];
+ float conv1_state[ENC_CONV1_STATE_SIZE];
+ float conv2_state[2*ENC_CONV2_STATE_SIZE];
+ float conv3_state[2*ENC_CONV3_STATE_SIZE];
+ float conv4_state[2*ENC_CONV4_STATE_SIZE];
+ float conv5_state[2*ENC_CONV5_STATE_SIZE];
+};
+
+void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input, int arch);
+
+
+#endif
diff --git a/dnn/dump_data.c b/dnn/dump_data.c
new file mode 100644
index 00000000..e4c78c7e
--- /dev/null
+++ b/dnn/dump_data.c
@@ -0,0 +1,280 @@
+/* Copyright (c) 2017-2018 Mozilla */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "kiss_fft.h"
+#include "common.h"
+#include <math.h>
+#include "freq.h"
+#include "pitch.h"
+#include "arch.h"
+#include <assert.h>
+#include "lpcnet.h"
+#include "lpcnet_private.h"
+#include "os_support.h"
+#include "cpu_support.h"
+
+
+static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
+ int i;
+ for (i=0;i<N;i++) {
+ float xi, yi;
+ xi = x[i];
+ yi = x[i] + mem[0];
+ mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
+ mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
+ y[i] = yi;
+ }
+}
+
+static float uni_rand(void) {
+ return rand()/(double)RAND_MAX-.5;
+}
+
+static void rand_resp(float *a, float *b) {
+ a[0] = .75*uni_rand();
+ a[1] = .75*uni_rand();
+ b[0] = .75*uni_rand();
+ b[1] = .75*uni_rand();
+}
+
+void compute_noise(int *noise, float noise_std) {
+ int i;
+ for (i=0;i<FRAME_SIZE;i++) {
+ noise[i] = (int)floor(.5 + noise_std*.707*(log_approx(rand()/(float)RAND_MAX)-log_approx(rand()/(float)RAND_MAX)));
+ }
+}
+
+static opus_int16 float2short(float x)
+{
+ int i;
+ i = (int)floor(.5+x);
+ return IMAX(-32767, IMIN(32767, i));
+}
+
+
+void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) {
+ int i;
+ opus_int16 data[2*FRAME_SIZE];
+ for (i=0;i<FRAME_SIZE;i++) {
+ float p=0;
+ float e;
+ int j;
+ for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j];
+ e = lin2ulaw(pcm[i] - p);
+ /* Signal in. */
+ data[2*i] = float2short(st->sig_mem[0]);
+ /* Signal out. */
+ data[2*i+1] = pcm[i];
+ /* Simulate error on excitation. */
+ e += noise[i];
+ e = IMIN(255, IMAX(0, e));
+
+ OPUS_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
+ st->sig_mem[0] = p + ulaw2lin(e);
+ }
+ fwrite(data, 4*FRAME_SIZE, 1, file);
+}
+
+int main(int argc, char **argv) {
+ int i;
+ char *argv0;
+ int count=0;
+ static const float a_hp[2] = {-1.99599, 0.99600};
+ static const float b_hp[2] = {-2, 1};
+ float a_sig[2] = {0};
+ float b_sig[2] = {0};
+ float mem_hp_x[2]={0};
+ float mem_resp_x[2]={0};
+ float mem_preemph=0;
+ float x[FRAME_SIZE];
+ int gain_change_count=0;
+ FILE *f1;
+ FILE *ffeat;
+ FILE *fpcm=NULL;
+ opus_int16 pcm[FRAME_SIZE]={0};
+ int noisebuf[FRAME_SIZE]={0};
+ opus_int16 tmp[FRAME_SIZE] = {0};
+ float speech_gain=1;
+ float old_speech_gain = 1;
+ int one_pass_completed = 0;
+ LPCNetEncState *st;
+ float noise_std=0;
+ int training = -1;
+ int burg = 0;
+ int pitch = 0;
+ FILE *fnoise = NULL;
+ float noise_gain = 0;
+ long noise_size=0;
+ int arch;
+ srand(getpid());
+ arch = opus_select_arch();
+ st = lpcnet_encoder_create();
+ argv0=argv[0];
+ if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
+ burg = 1;
+ training = 1;
+ }
+ else if (argc == 4 && strcmp(argv[1], "-btest")==0) {
+ burg = 1;
+ training = 0;
+ }
+ else if (argc == 5 && strcmp(argv[1], "-ptrain")==0) {
+ pitch = 1;
+ training = 1;
+ fnoise = fopen(argv[2], "rb");
+ fseek(fnoise, 0, SEEK_END);
+ noise_size = ftell(fnoise);
+ fseek(fnoise, 0, SEEK_SET);
+ argv++;
+ }
+ else if (argc == 4 && strcmp(argv[1], "-ptest")==0) {
+ pitch = 1;
+ training = 0;
+ }
+ else if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
+ else if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
+ if (training == -1) {
+ fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
+ fprintf(stderr, " or %s -test <speech> <features out>\n", argv0);
+ return 1;
+ }
+ f1 = fopen(argv[2], "r");
+ if (f1 == NULL) {
+ fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[2]);
+ exit(1);
+ }
+ ffeat = fopen(argv[3], "wb");
+ if (ffeat == NULL) {
+ fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
+ exit(1);
+ }
+ if (training && !pitch) {
+ fpcm = fopen(argv[4], "wb");
+ if (fpcm == NULL) {
+ fprintf(stderr,"Error opening output PCM file: %s\n", argv[4]);
+ exit(1);
+ }
+ }
+ while (1) {
+ size_t ret;
+ ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
+ if (feof(f1) || ret != FRAME_SIZE) {
+ if (!training) break;
+ rewind(f1);
+ ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
+ if (ret != FRAME_SIZE) {
+ fprintf(stderr, "error reading\n");
+ exit(1);
+ }
+ one_pass_completed = 1;
+ }
+ for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
+ if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
+ if (training && ++gain_change_count > 2821) {
+ float tmp1, tmp2;
+ speech_gain = pow(10., (-30+(rand()%40))/20.);
+ if (rand()&1) speech_gain = -speech_gain;
+ if (rand()%20==0) speech_gain *= .01;
+ if (!pitch && rand()%100==0) speech_gain = 0;
+ gain_change_count = 0;
+ rand_resp(a_sig, b_sig);
+ tmp1 = rand()/(float)RAND_MAX;
+ tmp2 = rand()/(float)RAND_MAX;
+ noise_std = ABS16(-1.5*log(1e-4+tmp1)-.5*log(1e-4+tmp2));
+ if (fnoise != NULL) {
+ long pos;
+ /* Randomize the fraction because rand() only gives us 31 bits. */
+ float frac_pos = rand()/(float)RAND_MAX;
+ pos = (long)(frac_pos*noise_size);
+ /* 32-bit alignment. */
+ pos = pos/4 * 4;
+ if (pos > noise_size-500000) pos = noise_size-500000;
+ noise_gain = pow(10., (-15+(rand()%40))/20.);
+ if (rand()%10==0) noise_gain = 0;
+ fseek(fnoise, pos, SEEK_SET);
+ }
+ }
+ if (fnoise != NULL) {
+ opus_int16 noise[FRAME_SIZE];
+ ret = fread(noise, sizeof(opus_int16), FRAME_SIZE, fnoise);
+ for (i=0;i<FRAME_SIZE;i++) x[i] += noise[i]*noise_gain;
+ }
+ biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
+ biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
+ for (i=0;i<FRAME_SIZE;i++) {
+ float g;
+ float f = (float)i/FRAME_SIZE;
+ g = f*speech_gain + (1-f)*old_speech_gain;
+ x[i] *= g;
+ }
+ if (burg) {
+ float ceps[2*NB_BANDS];
+ burg_cepstral_analysis(ceps, x);
+ fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
+ }
+ preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
+ for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5f;
+ /* PCM is delayed by 1/2 frame to make the features centered on the frames. */
+ for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
+ compute_frame_features(st, x, arch);
+
+ if (fpcm) {
+ compute_noise(noisebuf, noise_std);
+ }
+
+ if (pitch) {
+ signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES];
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
+ pitch_features[i] = (int)floor(.5f + 127.f*st->xcorr_features[i]);
+ }
+ for (i=0;i<PITCH_IF_FEATURES;i++) {
+ pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = (int)floor(.5f + 127.f*st->if_features[i]);
+ }
+ fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat);
+ } else {
+ fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat);
+ }
+ /*if(pitch) fwrite(pcm, FRAME_SIZE, 2, stdout);*/
+ if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
+ /*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
+ for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
+ old_speech_gain = speech_gain;
+ count++;
+ }
+ fclose(f1);
+ fclose(ffeat);
+ if (fpcm) fclose(fpcm);
+ lpcnet_encoder_destroy(st);
+ return 0;
+}
diff --git a/dnn/dump_lpcnet_tables.c b/dnn/dump_lpcnet_tables.c
new file mode 100644
index 00000000..95084d6e
--- /dev/null
+++ b/dnn/dump_lpcnet_tables.c
@@ -0,0 +1,104 @@
+/* Copyright (c) 2017-2018 Mozilla
+ Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+#include "freq.h"
+#include "kiss_fft.h"
+
+
+int main(void) {
+ int i;
+ FILE *file;
+ kiss_fft_state *kfft;
+ float half_window[OVERLAP_SIZE];
+ float dct_table[NB_BANDS*NB_BANDS];
+
+ file=fopen("lpcnet_tables.c", "wb");
+ fprintf(file, "/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/\n\n");
+ fprintf(file, "#ifdef HAVE_CONFIG_H\n");
+ fprintf(file, "#include \"config.h\"\n");
+ fprintf(file, "#endif\n");
+
+ fprintf(file, "#include \"kiss_fft.h\"\n\n");
+
+ kfft = opus_fft_alloc_twiddles(WINDOW_SIZE, NULL, NULL, NULL, 0);
+
+ fprintf(file, "static const arch_fft_state arch_fft = {0, NULL};\n\n");
+
+ fprintf (file, "static const opus_int16 fft_bitrev[%d] = {\n", kfft->nfft);
+ for (i=0;i<kfft->nfft;i++)
+ fprintf (file, "%d,%c", kfft->bitrev[i],(i+16)%15==0?'\n':' ');
+ fprintf (file, "};\n\n");
+
+ fprintf (file, "static const kiss_twiddle_cpx fft_twiddles[%d] = {\n", kfft->nfft);
+ for (i=0;i<kfft->nfft;i++)
+ fprintf (file, "{%#0.9gf, %#0.9gf},%c", kfft->twiddles[i].r, kfft->twiddles[i].i,(i+3)%2==0?'\n':' ');
+ fprintf (file, "};\n\n");
+
+
+ fprintf(file, "const kiss_fft_state kfft = {\n");
+ fprintf(file, "%d, /* nfft */\n", kfft->nfft);
+ fprintf(file, "%#0.8gf, /* scale */\n", kfft->scale);
+ fprintf(file, "%d, /* shift */\n", kfft->shift);
+ fprintf(file, "{");
+ for (i=0;i<2*MAXFACTORS;i++) {
+ fprintf(file, "%d, ", kfft->factors[i]);
+ }
+ fprintf(file, "}, /* factors */\n");
+ fprintf(file, "fft_bitrev, /* bitrev*/\n");
+ fprintf(file, "fft_twiddles, /* twiddles*/\n");
+ fprintf(file, "(arch_fft_state *)&arch_fft, /* arch_fft*/\n");
+
+ fprintf(file, "};\n\n");
+
+ for (i=0;i<OVERLAP_SIZE;i++)
+ half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/OVERLAP_SIZE) * sin(.5*M_PI*(i+.5)/OVERLAP_SIZE));
+ fprintf(file, "const float half_window[] = {\n");
+ for (i=0;i<OVERLAP_SIZE;i++)
+ fprintf (file, "%#0.9gf,%c", half_window[i],(i+6)%5==0?'\n':' ');
+ fprintf(file, "};\n\n");
+
+ for (i=0;i<NB_BANDS;i++) {
+ int j;
+ for (j=0;j<NB_BANDS;j++) {
+ dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
+ if (j==0) dct_table[i*NB_BANDS + j] *= sqrt(.5);
+ }
+ }
+ fprintf(file, "const float dct_table[] = {\n");
+ for (i=0;i<NB_BANDS*NB_BANDS;i++)
+ fprintf (file, "%#0.9gf,%c", dct_table[i],(i+6)%5==0?'\n':' ');
+ fprintf(file, "};\n");
+
+ fclose(file);
+ return 0;
+}
diff --git a/dnn/fargan.c b/dnn/fargan.c
new file mode 100644
index 00000000..c35b3f0c
--- /dev/null
+++ b/dnn/fargan.c
@@ -0,0 +1,225 @@
+/* Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "fargan.h"
+#include "os_support.h"
+#include "freq.h"
+#include "fargan_data.h"
+#include "lpcnet.h"
+#include "pitch.h"
+#include "nnet.h"
+#include "lpcnet_private.h"
+#include "cpu_support.h"
+
+#define FARGAN_FEATURES (NB_FEATURES)
+
+static void compute_fargan_cond(FARGANState *st, float *cond, const float *features, int period)
+{
+ FARGAN *model;
+ float dense_in[NB_FEATURES+COND_NET_PEMBED_OUT_SIZE];
+ float conv1_in[COND_NET_FCONV1_IN_SIZE];
+ float fdense2_in[COND_NET_FCONV1_OUT_SIZE];
+ model = &st->model;
+ celt_assert(FARGAN_FEATURES+COND_NET_PEMBED_OUT_SIZE == model->cond_net_fdense1.nb_inputs);
+ celt_assert(COND_NET_FCONV1_IN_SIZE == model->cond_net_fdense1.nb_outputs);
+ celt_assert(COND_NET_FCONV1_OUT_SIZE == model->cond_net_fconv1.nb_outputs);
+ OPUS_COPY(&dense_in[NB_FEATURES], &model->cond_net_pembed.float_weights[IMAX(0,IMIN(period-32, 223))*COND_NET_PEMBED_OUT_SIZE], COND_NET_PEMBED_OUT_SIZE);
+ OPUS_COPY(dense_in, features, NB_FEATURES);
+
+ compute_generic_dense(&model->cond_net_fdense1, conv1_in, dense_in, ACTIVATION_TANH, st->arch);
+ compute_generic_conv1d(&model->cond_net_fconv1, fdense2_in, st->cond_conv1_state, conv1_in, COND_NET_FCONV1_IN_SIZE, ACTIVATION_TANH, st->arch);
+ compute_generic_dense(&model->cond_net_fdense2, cond, fdense2_in, ACTIVATION_TANH, st->arch);
+}
+
+static void fargan_deemphasis(float *pcm, float *deemph_mem) {
+ int i;
+ for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) {
+ pcm[i] += FARGAN_DEEMPHASIS * *deemph_mem;
+ *deemph_mem = pcm[i];
+ }
+}
+
+static void run_fargan_subframe(FARGANState *st, float *pcm, const float *cond, int period)
+{
+ int i, pos;
+ float fwc0_in[SIG_NET_INPUT_SIZE];
+ float gru1_in[SIG_NET_FWC0_CONV_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
+ float gru2_in[SIG_NET_GRU1_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
+ float gru3_in[SIG_NET_GRU2_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
+ float pred[FARGAN_SUBFRAME_SIZE+4];
+ float prev[FARGAN_SUBFRAME_SIZE];
+ float pitch_gate[4];
+ float gain;
+ float gain_1;
+ float skip_cat[10000];
+ float skip_out[SIG_NET_SKIP_DENSE_OUT_SIZE];
+ FARGAN *model;
+
+ celt_assert(st->cont_initialized);
+ model = &st->model;
+
+ compute_generic_dense(&model->sig_net_cond_gain_dense, &gain, cond, ACTIVATION_LINEAR, st->arch);
+ gain = exp(gain);
+ gain_1 = 1.f/(1e-5f + gain);
+
+ pos = PITCH_MAX_PERIOD-period-2;
+ for (i=0;i<FARGAN_SUBFRAME_SIZE+4;i++) {
+ pred[i] = MIN32(1.f, MAX32(-1.f, gain_1*st->pitch_buf[IMAX(0, pos)]));
+ pos++;
+ if (pos == PITCH_MAX_PERIOD) pos -= period;
+ }
+ for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) prev[i] = MAX32(-1.f, MIN16(1.f, gain_1*st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE+i]));
+
+ OPUS_COPY(&fwc0_in[0], &cond[0], FARGAN_COND_SIZE);
+ OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE], pred, FARGAN_SUBFRAME_SIZE+4);
+ OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE+FARGAN_SUBFRAME_SIZE+4], prev, FARGAN_SUBFRAME_SIZE);
+
+ compute_generic_conv1d(&model->sig_net_fwc0_conv, gru1_in, st->fwc0_mem, fwc0_in, SIG_NET_INPUT_SIZE, ACTIVATION_TANH, st->arch);
+ celt_assert(SIG_NET_FWC0_GLU_GATE_OUT_SIZE == model->sig_net_fwc0_glu_gate.nb_outputs);
+ compute_glu(&model->sig_net_fwc0_glu_gate, gru1_in, gru1_in, st->arch);
+
+ compute_generic_dense(&model->sig_net_gain_dense_out, pitch_gate, gru1_in, ACTIVATION_SIGMOID, st->arch);
+
+ for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+i] = pitch_gate[0]*pred[i+2];
+ OPUS_COPY(&gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
+ compute_generic_gru(&model->sig_net_gru1_input, &model->sig_net_gru1_recurrent, st->gru1_state, gru1_in, st->arch);
+ compute_glu(&model->sig_net_gru1_glu_gate, gru2_in, st->gru1_state, st->arch);
+
+ for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru2_in[SIG_NET_GRU1_OUT_SIZE+i] = pitch_gate[1]*pred[i+2];
+ OPUS_COPY(&gru2_in[SIG_NET_GRU1_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
+ compute_generic_gru(&model->sig_net_gru2_input, &model->sig_net_gru2_recurrent, st->gru2_state, gru2_in, st->arch);
+ compute_glu(&model->sig_net_gru2_glu_gate, gru3_in, st->gru2_state, st->arch);
+
+ for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru3_in[SIG_NET_GRU2_OUT_SIZE+i] = pitch_gate[2]*pred[i+2];
+ OPUS_COPY(&gru3_in[SIG_NET_GRU2_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
+ compute_generic_gru(&model->sig_net_gru3_input, &model->sig_net_gru3_recurrent, st->gru3_state, gru3_in, st->arch);
+ compute_glu(&model->sig_net_gru3_glu_gate, &skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE], st->gru3_state, st->arch);
+
+ OPUS_COPY(skip_cat, gru2_in, SIG_NET_GRU1_OUT_SIZE);
+ OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE], gru3_in, SIG_NET_GRU2_OUT_SIZE);
+ OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE], gru1_in, SIG_NET_FWC0_CONV_OUT_SIZE);
+ for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+i] = pitch_gate[3]*pred[i+2];
+ OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
+
+ compute_generic_dense(&model->sig_net_skip_dense, skip_out, skip_cat, ACTIVATION_TANH, st->arch);
+ compute_glu(&model->sig_net_skip_glu_gate, skip_out, skip_out, st->arch);
+
+ compute_generic_dense(&model->sig_net_sig_dense_out, pcm, skip_out, ACTIVATION_TANH, st->arch);
+ for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) pcm[i] *= gain;
+
+ OPUS_MOVE(st->pitch_buf, &st->pitch_buf[FARGAN_SUBFRAME_SIZE], PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE);
+ OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], pcm, FARGAN_SUBFRAME_SIZE);
+ fargan_deemphasis(pcm, &st->deemph_mem);
+}
+
+void fargan_cont(FARGANState *st, const float *pcm0, const float *features0)
+{
+ int i;
+ float cond[COND_NET_FDENSE2_OUT_SIZE];
+ float x0[FARGAN_CONT_SAMPLES];
+ float dummy[FARGAN_SUBFRAME_SIZE];
+ int period=0;
+
+ /* Pre-load features. */
+ for (i=0;i<5;i++) {
+ const float *features = &features0[i*NB_FEATURES];
+ st->last_period = period;
+ period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
+ compute_fargan_cond(st, cond, features, period);
+ }
+
+ x0[0] = 0;
+ for (i=1;i<FARGAN_CONT_SAMPLES;i++) {
+ x0[i] = pcm0[i] - FARGAN_DEEMPHASIS*pcm0[i-1];
+ }
+
+ OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_FRAME_SIZE], x0, FARGAN_FRAME_SIZE);
+ st->cont_initialized = 1;
+
+ for (i=0;i<FARGAN_NB_SUBFRAMES;i++) {
+ run_fargan_subframe(st, dummy, &cond[i*FARGAN_COND_SIZE], st->last_period);
+ OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], &x0[FARGAN_FRAME_SIZE+i*FARGAN_SUBFRAME_SIZE], FARGAN_SUBFRAME_SIZE);
+ }
+ st->deemph_mem = pcm0[FARGAN_CONT_SAMPLES-1];
+}
+
+
+void fargan_init(FARGANState *st)
+{
+ int ret;
+ OPUS_CLEAR(st, 1);
+ st->arch = opus_select_arch();
+#ifndef USE_WEIGHTS_FILE
+ ret = init_fargan(&st->model, fargan_arrays);
+#else
+ ret = 0;
+#endif
+ celt_assert(ret == 0);
+}
+
+int fargan_load_model(FARGANState *st, const void *data, int len) {
+ WeightArray *list;
+ int ret;
+ parse_weights(&list, data, len);
+ ret = init_fargan(&st->model, list);
+ opus_free(list);
+ if (ret == 0) return 0;
+ else return -1;
+}
+
+static void fargan_synthesize_impl(FARGANState *st, float *pcm, const float *features)
+{
+ int subframe;
+ float cond[COND_NET_FDENSE2_OUT_SIZE];
+ int period;
+ celt_assert(st->cont_initialized);
+
+ period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
+ compute_fargan_cond(st, cond, features, period);
+ for (subframe=0;subframe<FARGAN_NB_SUBFRAMES;subframe++) {
+ float *sub_cond;
+ sub_cond = &cond[subframe*FARGAN_COND_SIZE];
+ run_fargan_subframe(st, &pcm[subframe*FARGAN_SUBFRAME_SIZE], sub_cond, st->last_period);
+ }
+ st->last_period = period;
+}
+
+void fargan_synthesize(FARGANState *st, float *pcm, const float *features)
+{
+ fargan_synthesize_impl(st, pcm, features);
+}
+
+void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features)
+{
+ int i;
+ float fpcm[FARGAN_FRAME_SIZE];
+ fargan_synthesize(st, fpcm, features);
+ for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
+}
diff --git a/dnn/fargan.h b/dnn/fargan.h
new file mode 100644
index 00000000..d44ae89b
--- /dev/null
+++ b/dnn/fargan.h
@@ -0,0 +1,68 @@
+/* Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FARGAN_H
+#define FARGAN_H
+
+#include "freq.h"
+#include "fargan_data.h"
+#include "pitchdnn.h"
+
+#define FARGAN_CONT_SAMPLES 320
+#define FARGAN_NB_SUBFRAMES 4
+#define FARGAN_SUBFRAME_SIZE 40
+#define FARGAN_FRAME_SIZE (FARGAN_NB_SUBFRAMES*FARGAN_SUBFRAME_SIZE)
+#define FARGAN_COND_SIZE (COND_NET_FDENSE2_OUT_SIZE/FARGAN_NB_SUBFRAMES)
+#define FARGAN_DEEMPHASIS 0.85f
+
+#define SIG_NET_INPUT_SIZE (FARGAN_COND_SIZE+2*FARGAN_SUBFRAME_SIZE+4)
+#define SIG_NET_FWC0_STATE_SIZE (2*SIG_NET_INPUT_SIZE)
+
+#define FARGAN_MAX_RNN_NEURONS SIG_NET_GRU1_OUT_SIZE
+typedef struct {
+ FARGAN model;
+ int arch;
+ int cont_initialized;
+ float deemph_mem;
+ float pitch_buf[PITCH_MAX_PERIOD];
+ float cond_conv1_state[COND_NET_FCONV1_STATE_SIZE];
+ float fwc0_mem[SIG_NET_FWC0_STATE_SIZE];
+ float gru1_state[SIG_NET_GRU1_STATE_SIZE];
+ float gru2_state[SIG_NET_GRU2_STATE_SIZE];
+ float gru3_state[SIG_NET_GRU3_STATE_SIZE];
+ int last_period;
+} FARGANState;
+
+void fargan_init(FARGANState *st);
+int fargan_load_model(FARGANState *st, const void *data, int len);
+
+void fargan_cont(FARGANState *st, const float *pcm0, const float *features0);
+
+void fargan_synthesize(FARGANState *st, float *pcm, const float *features);
+void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features);
+
+
+#endif /* FARGAN_H */
diff --git a/dnn/freq.c b/dnn/freq.c
new file mode 100644
index 00000000..8e91649a
--- /dev/null
+++ b/dnn/freq.c
@@ -0,0 +1,328 @@
+/* Copyright (c) 2017-2018 Mozilla */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "kiss_fft.h"
+#include <math.h>
+#include "freq.h"
+#include "pitch.h"
+#include "arch.h"
+#include "burg.h"
+#include <assert.h>
+#include "os_support.h"
+
+#define SQUARE(x) ((x)*(x))
+
+static const opus_int16 eband5ms[] = {
+/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k*/
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40
+};
+
+static const float compensation[] = {
+ 0.8f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.666667f, 0.5f, 0.5f, 0.5f, 0.333333f, 0.25f, 0.25f, 0.2f, 0.166667f, 0.173913f
+};
+
+
+extern const kiss_fft_state kfft;
+extern const float half_window[OVERLAP_SIZE];
+extern const float dct_table[NB_BANDS*NB_BANDS];
+
+
+static void compute_band_energy_inverse(float *bandE, const kiss_fft_cpx *X) {
+ int i;
+ float sum[NB_BANDS] = {0};
+ for (i=0;i<NB_BANDS-1;i++)
+ {
+ int j;
+ int band_size;
+ band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
+ for (j=0;j<band_size;j++) {
+ float tmp;
+ float frac = (float)j/band_size;
+ tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
+ tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
+ tmp = 1.f/(tmp + 1e-9);
+ sum[i] += (1-frac)*tmp;
+ sum[i+1] += frac*tmp;
+ }
+ }
+ sum[0] *= 2;
+ sum[NB_BANDS-1] *= 2;
+ for (i=0;i<NB_BANDS;i++)
+ {
+ bandE[i] = sum[i];
+ }
+}
+
+static float lpcn_lpc(
+ opus_val16 *lpc, /* out: [0...p-1] LPC coefficients */
+ opus_val16 *rc,
+const opus_val32 *ac, /* in: [0...p] autocorrelation values */
+int p
+)
+{
+ int i, j;
+ opus_val32 r;
+ opus_val32 error = ac[0];
+
+ OPUS_CLEAR(lpc, p);
+ OPUS_CLEAR(rc, p);
+ if (ac[0] != 0)
+ {
+ for (i = 0; i < p; i++) {
+ /* Sum up this iteration's reflection coefficient */
+ opus_val32 rr = 0;
+ for (j = 0; j < i; j++)
+ rr += MULT32_32_Q31(lpc[j],ac[i - j]);
+ rr += SHR32(ac[i + 1],3);
+ r = -SHL32(rr,3)/error;
+ rc[i] = r;
+ /* Update LPC coefficients and total error */
+ lpc[i] = SHR32(r,3);
+ for (j = 0; j < (i+1)>>1; j++)
+ {
+ opus_val32 tmp1, tmp2;
+ tmp1 = lpc[j];
+ tmp2 = lpc[i-1-j];
+ lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2);
+ lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
+ }
+
+ error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
+ /* Bail out once we get 30 dB gain */
+ if (error<.001f*ac[0])
+ break;
+ }
+ }
+ return error;
+}
+
+
+
+void lpcn_compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
+ int i;
+ float sum[NB_BANDS] = {0};
+ for (i=0;i<NB_BANDS-1;i++)
+ {
+ int j;
+ int band_size;
+ band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
+ for (j=0;j<band_size;j++) {
+ float tmp;
+ float frac = (float)j/band_size;
+ tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
+ tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
+ sum[i] += (1-frac)*tmp;
+ sum[i+1] += frac*tmp;
+ }
+ }
+ sum[0] *= 2;
+ sum[NB_BANDS-1] *= 2;
+ for (i=0;i<NB_BANDS;i++)
+ {
+ bandE[i] = sum[i];
+ }
+}
+
+static void compute_burg_cepstrum(const float *pcm, float *burg_cepstrum, int len, int order) {
+ int i;
+ float burg_in[FRAME_SIZE];
+ float burg_lpc[LPC_ORDER];
+ float x[WINDOW_SIZE];
+ float Eburg[NB_BANDS];
+ float g;
+ kiss_fft_cpx LPC[FREQ_SIZE];
+ float Ly[NB_BANDS];
+ float logMax = -2;
+ float follow = -2;
+ assert(order <= LPC_ORDER);
+ assert(len <= FRAME_SIZE);
+ for (i=0;i<len-1;i++) burg_in[i] = pcm[i+1] - PREEMPHASIS*pcm[i];
+ g = silk_burg_analysis(burg_lpc, burg_in, 1e-3, len-1, 1, order);
+ g /= len - 2*(order-1);
+ OPUS_CLEAR(x, WINDOW_SIZE);
+ x[0] = 1;
+ for (i=0;i<order;i++) x[i+1] = -burg_lpc[i]*pow(.995, i+1);
+ forward_transform(LPC, x);
+ compute_band_energy_inverse(Eburg, LPC);
+ for (i=0;i<NB_BANDS;i++) Eburg[i] *= .45*g*(1.f/((float)WINDOW_SIZE*WINDOW_SIZE*WINDOW_SIZE));
+ for (i=0;i<NB_BANDS;i++) {
+ Ly[i] = log10(1e-2+Eburg[i]);
+ Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
+ logMax = MAX16(logMax, Ly[i]);
+ follow = MAX16(follow-2.5, Ly[i]);
+ }
+ dct(burg_cepstrum, Ly);
+ burg_cepstrum[0] += - 4;
+}
+
+void burg_cepstral_analysis(float *ceps, const float *x) {
+ int i;
+ compute_burg_cepstrum(x, &ceps[0 ], FRAME_SIZE/2, LPC_ORDER);
+ compute_burg_cepstrum(&x[FRAME_SIZE/2], &ceps[NB_BANDS], FRAME_SIZE/2, LPC_ORDER);
+ for (i=0;i<NB_BANDS;i++) {
+ float c0, c1;
+ c0 = ceps[i];
+ c1 = ceps[NB_BANDS+i];
+ ceps[i ] = .5*(c0+c1);
+ ceps[NB_BANDS+i] = (c0-c1);
+ }
+}
+
+
+static void interp_band_gain(float *g, const float *bandE) {
+ int i;
+ memset(g, 0, FREQ_SIZE);
+ for (i=0;i<NB_BANDS-1;i++)
+ {
+ int j;
+ int band_size;
+ band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
+ for (j=0;j<band_size;j++) {
+ float frac = (float)j/band_size;
+ g[(eband5ms[i]*WINDOW_SIZE_5MS) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
+ }
+ }
+}
+
+
+void dct(float *out, const float *in) {
+ int i;
+ for (i=0;i<NB_BANDS;i++) {
+ int j;
+ float sum = 0;
+ for (j=0;j<NB_BANDS;j++) {
+ sum += in[j] * dct_table[j*NB_BANDS + i];
+ }
+ out[i] = sum*sqrt(2./NB_BANDS);
+ }
+}
+
+static void idct(float *out, const float *in) {
+ int i;
+ for (i=0;i<NB_BANDS;i++) {
+ int j;
+ float sum = 0;
+ for (j=0;j<NB_BANDS;j++) {
+ sum += in[j] * dct_table[i*NB_BANDS + j];
+ }
+ out[i] = sum*sqrt(2./NB_BANDS);
+ }
+}
+
+void forward_transform(kiss_fft_cpx *out, const float *in) {
+ int i;
+ kiss_fft_cpx x[WINDOW_SIZE];
+ kiss_fft_cpx y[WINDOW_SIZE];
+ for (i=0;i<WINDOW_SIZE;i++) {
+ x[i].r = in[i];
+ x[i].i = 0;
+ }
+ opus_fft(&kfft, x, y, 0);
+ for (i=0;i<FREQ_SIZE;i++) {
+ out[i] = y[i];
+ }
+}
+
+static void inverse_transform(float *out, const kiss_fft_cpx *in) {
+ int i;
+ kiss_fft_cpx x[WINDOW_SIZE];
+ kiss_fft_cpx y[WINDOW_SIZE];
+ for (i=0;i<FREQ_SIZE;i++) {
+ x[i] = in[i];
+ }
+ for (;i<WINDOW_SIZE;i++) {
+ x[i].r = x[WINDOW_SIZE - i].r;
+ x[i].i = -x[WINDOW_SIZE - i].i;
+ }
+ opus_fft(&kfft, x, y, 0);
+ /* output in reverse order for IFFT. */
+ out[0] = WINDOW_SIZE*y[0].r;
+ for (i=1;i<WINDOW_SIZE;i++) {
+ out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
+ }
+}
+
+static float lpc_from_bands(float *lpc, const float *Ex)
+{
+ int i;
+ float e;
+ float ac[LPC_ORDER+1];
+ float rc[LPC_ORDER];
+ float Xr[FREQ_SIZE];
+ kiss_fft_cpx X_auto[FREQ_SIZE];
+ float x_auto[WINDOW_SIZE];
+ interp_band_gain(Xr, Ex);
+ Xr[FREQ_SIZE-1] = 0;
+ OPUS_CLEAR(X_auto, FREQ_SIZE);
+ for (i=0;i<FREQ_SIZE;i++) X_auto[i].r = Xr[i];
+ inverse_transform(x_auto, X_auto);
+ for (i=0;i<LPC_ORDER+1;i++) ac[i] = x_auto[i];
+
+ /* -40 dB noise floor. */
+ ac[0] += ac[0]*1e-4 + 320/12/38.;
+ /* Lag windowing. */
+ for (i=1;i<LPC_ORDER+1;i++) ac[i] *= (1 - 6e-5*i*i);
+ e = lpcn_lpc(lpc, rc, ac, LPC_ORDER);
+ return e;
+}
+
+void lpc_weighting(float *lpc, float gamma)
+{
+ int i;
+ float gamma_i = gamma;
+ for (i = 0; i < LPC_ORDER; i++)
+ {
+ lpc[i] *= gamma_i;
+ gamma_i *= gamma;
+ }
+}
+
+float lpc_from_cepstrum(float *lpc, const float *cepstrum)
+{
+ int i;
+ float Ex[NB_BANDS];
+ float tmp[NB_BANDS];
+ OPUS_COPY(tmp, cepstrum, NB_BANDS);
+ tmp[0] += 4;
+ idct(Ex, tmp);
+ for (i=0;i<NB_BANDS;i++) Ex[i] = pow(10.f, Ex[i])*compensation[i];
+ return lpc_from_bands(lpc, Ex);
+}
+
+void apply_window(float *x) {
+ int i;
+ for (i=0;i<OVERLAP_SIZE;i++) {
+ x[i] *= half_window[i];
+ x[WINDOW_SIZE - 1 - i] *= half_window[i];
+ }
+}
diff --git a/dnn/freq.h b/dnn/freq.h
new file mode 100644
index 00000000..99ebf567
--- /dev/null
+++ b/dnn/freq.h
@@ -0,0 +1,61 @@
+/* Copyright (c) 2017-2018 Mozilla */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FREQ_H
+#define FREQ_H
+
+#include "kiss_fft.h"
+
+#define LPC_ORDER 16
+
+#define PREEMPHASIS (0.85f)
+
+#define FRAME_SIZE_5MS (2)
+#define OVERLAP_SIZE_5MS (2)
+#define TRAINING_OFFSET_5MS (1)
+
+#define WINDOW_SIZE_5MS (FRAME_SIZE_5MS + OVERLAP_SIZE_5MS)
+
+#define FRAME_SIZE (80*FRAME_SIZE_5MS)
+#define OVERLAP_SIZE (80*OVERLAP_SIZE_5MS)
+#define TRAINING_OFFSET (80*TRAINING_OFFSET_5MS)
+#define WINDOW_SIZE (FRAME_SIZE + OVERLAP_SIZE)
+#define FREQ_SIZE (WINDOW_SIZE/2 + 1)
+
+#define NB_BANDS 18
+#define NB_BANDS_1 (NB_BANDS - 1)
+
+void lpcn_compute_band_energy(float *bandE, const kiss_fft_cpx *X);
+void burg_cepstral_analysis(float *ceps, const float *x);
+
+void apply_window(float *x);
+void dct(float *out, const float *in);
+void forward_transform(kiss_fft_cpx *out, const float *in);
+float lpc_from_cepstrum(float *lpc, const float *cepstrum);
+void apply_window(float *x);
+void lpc_weighting(float *lpc, float gamma);
+
+#endif
diff --git a/dnn/fwgan.c b/dnn/fwgan.c
new file mode 100644
index 00000000..301c73f6
--- /dev/null
+++ b/dnn/fwgan.c
@@ -0,0 +1,322 @@
+/* Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "fwgan.h"
+#include "os_support.h"
+#include "freq.h"
+#include "fwgan_data.h"
+#include "lpcnet.h"
+#include "pitch.h"
+#include "nnet.h"
+#include "lpcnet_private.h"
+
+#define FEAT_IN_SIZE (BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4 + FWGAN_FRAME_SIZE/2)
+
+#define FWGAN_FEATURES (NB_FEATURES-1)
+
+static void pitch_embeddings(float *pembed, float *phase, double w0) {
+ int i;
+ float wreal, wimag;
+#if 1
+ /* This Taylor expansion should be good enough since w0 is always small. */
+ float w2 = w0*w0;
+ wreal = 1 - .5*w2*(1.f - 0.083333333f*w2);
+ wimag = w0*(1 - 0.166666667f*w2*(1.f - 0.05f*w2));
+#else
+ wreal = cos(w0);
+ wimag = sin(w0);
+#endif
+ /* Speed-up phase reference by making phase a unit-norm complex value and rotating it
+ by exp(-i*w0) each sample. */
+ for (i=0;i<SUBFRAME_SIZE;i++) {
+ float tmp;
+ tmp = phase[0]*wreal - phase[1]*wimag;
+ phase[1] = phase[0]*wimag + phase[1]*wreal;
+ phase[0] = tmp;
+ pembed[i] = phase[1];
+ pembed[SUBFRAME_SIZE+i] = phase[0];
+ }
+ /* Renormalize once per sub-frame, though we could probably do it even less frequently. */
+ {
+ float r = 1.f/sqrt(phase[0]*phase[0] + phase[1]*phase[1]);
+ phase[0] *= r;
+ phase[1] *= r;
+ }
+}
+
+static void compute_wlpc(float lpc[LPC_ORDER], const float *features) {
+ float lpc_weight;
+ int i;
+ lpc_from_cepstrum(lpc, features);
+ lpc_weight = 1.f;
+ for (i=0;i<LPC_ORDER;i++) {
+ lpc_weight *= FWGAN_GAMMA;
+ lpc[i] *= lpc_weight;
+ }
+}
+
+static void run_fwgan_upsampler(FWGANState *st, float *cond, const float *features)
+{
+ FWGAN *model;
+ model = &st->model;
+ celt_assert(FWGAN_FEATURES == model->bfcc_with_corr_upsampler_fc.nb_inputs);
+ celt_assert(BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE == model->bfcc_with_corr_upsampler_fc.nb_outputs);
+ compute_generic_dense(&model->bfcc_with_corr_upsampler_fc, cond, features, ACTIVATION_TANH);
+}
+
+static void fwgan_synthesize_impl(FWGANState *st, float *pcm, const float *lpc, const float *features);
+void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0)
+{
+ int i;
+ float norm2, norm_1;
+ float wpcm0[CONT_PCM_INPUTS];
+ float cont_inputs[CONT_PCM_INPUTS+1];
+ float tmp1[MAX_CONT_SIZE];
+ float tmp2[MAX_CONT_SIZE];
+ float lpc[LPC_ORDER];
+ float new_pcm[FWGAN_FRAME_SIZE];
+ FWGAN *model;
+ st->embed_phase[0] = 1;
+ model = &st->model;
+ compute_wlpc(lpc, features0);
+ /* Deemphasis memory is just the last continuation sample. */
+ st->deemph_mem = pcm0[CONT_PCM_INPUTS-1];
+
+ /* Apply analysis filter, considering that the preemphasis and deemphasis filter
+ cancel each other in this case since the LPC filter is constant across that boundary.
+ */
+ for (i=LPC_ORDER;i<CONT_PCM_INPUTS;i++) {
+ int j;
+ wpcm0[i] = pcm0[i];
+ for (j=0;j<LPC_ORDER;j++) wpcm0[i] += lpc[j]*pcm0[i-j-1];
+ }
+ /* FIXME: Make this less stupid. */
+ for (i=0;i<LPC_ORDER;i++) wpcm0[i] = wpcm0[LPC_ORDER];
+
+ /* The memory of the pre-empahsis is the last sample of the weighted signal
+ (ignoring preemphasis+deemphasis combination). */
+ st->preemph_mem = wpcm0[CONT_PCM_INPUTS-1];
+ /* The memory of the synthesis filter is the pre-emphasized continuation. */
+ for (i=0;i<LPC_ORDER;i++) st->syn_mem[i] = pcm0[CONT_PCM_INPUTS-1-i] - FWGAN_DEEMPHASIS*pcm0[CONT_PCM_INPUTS-2-i];
+
+ norm2 = celt_inner_prod(wpcm0, wpcm0, CONT_PCM_INPUTS, st->arch);
+ norm_1 = 1.f/sqrt(1e-8f + norm2);
+ for (i=0;i<CONT_PCM_INPUTS;i++) cont_inputs[i+1] = norm_1*wpcm0[i];
+ cont_inputs[0] = log(sqrt(norm2) + 1e-7f);
+
+ /* Continuation network */
+ compute_generic_dense(&model->cont_net_0, tmp1, cont_inputs, ACTIVATION_TANH);
+ compute_generic_dense(&model->cont_net_2, tmp2, tmp1, ACTIVATION_TANH);
+ compute_generic_dense(&model->cont_net_4, tmp1, tmp2, ACTIVATION_TANH);
+ compute_generic_dense(&model->cont_net_6, tmp2, tmp1, ACTIVATION_TANH);
+ compute_generic_dense(&model->cont_net_8, tmp1, tmp2, ACTIVATION_TANH);
+ celt_assert(CONT_NET_10_OUT_SIZE == model->cont_net_10.nb_outputs);
+ compute_generic_dense(&model->cont_net_10, st->cont, tmp1, ACTIVATION_TANH);
+
+ /* Computing continuation for each layer. */
+ celt_assert(RNN_GRU_STATE_SIZE == model->rnn_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->rnn_cont_fc_0, st->rnn_state, st->cont, ACTIVATION_TANH);
+
+ celt_assert(FWC1_STATE_SIZE == model->fwc1_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc1_cont_fc_0, st->fwc1_state, st->cont, ACTIVATION_TANH);
+ celt_assert(FWC2_STATE_SIZE == model->fwc2_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc2_cont_fc_0, st->fwc2_state, st->cont, ACTIVATION_TANH);
+ celt_assert(FWC3_STATE_SIZE == model->fwc3_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc3_cont_fc_0, st->fwc3_state, st->cont, ACTIVATION_TANH);
+ celt_assert(FWC4_STATE_SIZE == model->fwc4_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc4_cont_fc_0, st->fwc4_state, st->cont, ACTIVATION_TANH);
+ celt_assert(FWC5_STATE_SIZE == model->fwc5_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc5_cont_fc_0, st->fwc5_state, st->cont, ACTIVATION_TANH);
+ celt_assert(FWC6_STATE_SIZE == model->fwc6_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc6_cont_fc_0, st->fwc6_state, st->cont, ACTIVATION_TANH);
+ celt_assert(FWC7_STATE_SIZE == model->fwc7_cont_fc_0.nb_outputs);
+ compute_generic_dense(&model->fwc7_cont_fc_0, st->fwc7_state, st->cont, ACTIVATION_TANH);
+
+ st->cont_initialized = 1;
+ /* Process the first frame, discard the first subframe, and keep the rest for the first
+ synthesis call. */
+ fwgan_synthesize_impl(st, new_pcm, lpc, features0);
+ OPUS_COPY(st->pcm_buf, &new_pcm[SUBFRAME_SIZE], FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
+}
+
+static void apply_gain(float *pcm, float c0, float *last_gain) {
+ int i;
+ float gain = pow(10.f, (0.5f*c0/sqrt(18.f)));
+ for (i=0;i<SUBFRAME_SIZE;i++) pcm[i] *= *last_gain;
+ *last_gain = gain;
+}
+
+static void fwgan_lpc_syn(float *pcm, float *mem, const float *lpc, float last_lpc[LPC_ORDER]) {
+ int i;
+ for (i=0;i<SUBFRAME_SIZE;i++) {
+ int j;
+ for (j=0;j<LPC_ORDER;j++) pcm[i] -= mem[j]*last_lpc[j];
+ OPUS_MOVE(&mem[1], &mem[0], LPC_ORDER-1);
+ mem[0] = pcm[i];
+ }
+ OPUS_COPY(last_lpc, lpc, LPC_ORDER);
+}
+
+static void fwgan_preemphasis(float *pcm, float *preemph_mem) {
+ int i;
+ for (i=0;i<SUBFRAME_SIZE;i++) {
+ float tmp = pcm[i];
+ pcm[i] -= FWGAN_DEEMPHASIS * *preemph_mem;
+ *preemph_mem = tmp;
+ }
+}
+
+static void fwgan_deemphasis(float *pcm, float *deemph_mem) {
+ int i;
+ for (i=0;i<SUBFRAME_SIZE;i++) {
+ pcm[i] += FWGAN_DEEMPHASIS * *deemph_mem;
+ *deemph_mem = pcm[i];
+ }
+}
+
+static void run_fwgan_subframe(FWGANState *st, float *pcm, const float *cond, double w0, const float *lpc, float c0)
+{
+ float tmp1[FWC1_FC_0_OUT_SIZE];
+ float tmp2[IMAX(RNN_GRU_STATE_SIZE, FWC2_FC_0_OUT_SIZE)];
+ float feat_in[FEAT_IN_SIZE];
+ float rnn_in[FEAT_IN_CONV1_CONV_OUT_SIZE];
+ float pembed[FWGAN_FRAME_SIZE/2];
+ FWGAN *model;
+ model = &st->model;
+
+ pitch_embeddings(pembed, st->embed_phase, w0);
+ /* Interleave bfcc_cond and pembed for each subframe in feat_in. */
+ OPUS_COPY(&feat_in[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4], &cond[0], BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4);
+ OPUS_COPY(&feat_in[0], &pembed[0], FWGAN_FRAME_SIZE/2);
+
+ compute_generic_conv1d(&model->feat_in_conv1_conv, rnn_in, st->cont_conv1_mem, feat_in, FEAT_IN_CONV1_CONV_IN_SIZE, ACTIVATION_LINEAR);
+ celt_assert(FEAT_IN_NL1_GATE_OUT_SIZE == model->feat_in_nl1_gate.nb_outputs);
+ compute_gated_activation(&model->feat_in_nl1_gate, rnn_in, rnn_in, ACTIVATION_TANH);
+
+ if (st->cont_initialized == 1) {
+ /* On the very first subframe we stop here. We only want to run the feat_in layer since the
+ others are initialized via the continuation network. */
+ OPUS_CLEAR(pcm, SUBFRAME_SIZE);
+ st->cont_initialized = 2;
+ apply_gain(pcm, c0, &st->last_gain);
+ OPUS_COPY(st->last_lpc, lpc, LPC_ORDER);
+ return;
+ }
+
+ compute_generic_gru(&model->rnn_gru_input, &model->rnn_gru_recurrent, st->rnn_state, rnn_in);
+ celt_assert(IMAX(RNN_GRU_STATE_SIZE, FWC2_FC_0_OUT_SIZE) >= model->rnn_nl_gate.nb_outputs);
+ compute_gated_activation(&model->rnn_nl_gate, tmp2, st->rnn_state, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc1_fc_0, tmp1, st->fwc1_state, tmp2, RNN_GRU_STATE_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc1_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc2_fc_0, tmp2, st->fwc2_state, tmp1, FWC1_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc2_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc3_fc_0, tmp1, st->fwc3_state, tmp2, FWC2_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc3_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc4_fc_0, tmp2, st->fwc4_state, tmp1, FWC3_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc4_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc5_fc_0, tmp1, st->fwc5_state, tmp2, FWC4_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc5_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc6_fc_0, tmp2, st->fwc6_state, tmp1, FWC5_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc6_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
+
+ compute_generic_conv1d(&model->fwc7_fc_0, tmp1, st->fwc7_state, tmp2, FWC6_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
+ compute_gated_activation(&model->fwc7_fc_1_gate, pcm, tmp1, ACTIVATION_TANH);
+
+ apply_gain(pcm, c0, &st->last_gain);
+ fwgan_preemphasis(pcm, &st->preemph_mem);
+ fwgan_lpc_syn(pcm, st->syn_mem, lpc, st->last_lpc);
+ fwgan_deemphasis(pcm, &st->deemph_mem);
+}
+
+void fwgan_init(FWGANState *st)
+{
+ int ret;
+ OPUS_CLEAR(st, 1);
+ ret = init_fwgan(&st->model, fwgan_arrays);
+ celt_assert(ret == 0);
+ /* FIXME: perform arch detection. */
+}
+
+int fwgan_load_model(FWGANState *st, const unsigned char *data, int len) {
+ WeightArray *list;
+ int ret;
+ parse_weights(&list, data, len);
+ ret = init_fwgan(&st->model, list);
+ opus_free(list);
+ if (ret == 0) return 0;
+ else return -1;
+}
+
+static void fwgan_synthesize_impl(FWGANState *st, float *pcm, const float *lpc, const float *features)
+{
+ int subframe;
+ float cond[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE];
+ double w0;
+ int period;
+ float fwgan_features[NB_FEATURES-1];
+ celt_assert(st->cont_initialized);
+ OPUS_COPY(fwgan_features, features, NB_FEATURES-2);
+ fwgan_features[NB_FEATURES-2] = features[NB_FEATURES-1]+.5;
+
+ period = (int)floor(.1 + 50*features[NB_BANDS]+100);
+ w0 = 2*M_PI/period;
+ run_fwgan_upsampler(st, cond, fwgan_features);
+ for (subframe=0;subframe<NB_SUBFRAMES;subframe++) {
+ float *sub_cond;
+ sub_cond = &cond[subframe*BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4];
+ run_fwgan_subframe(st, &pcm[subframe*SUBFRAME_SIZE], sub_cond, w0, lpc, features[0]);
+ }
+}
+
+void fwgan_synthesize(FWGANState *st, float *pcm, const float *features)
+{
+ float lpc[LPC_ORDER];
+ float new_pcm[FWGAN_FRAME_SIZE];
+ compute_wlpc(lpc, features);
+ fwgan_synthesize_impl(st, new_pcm, lpc, features);
+ /* Handle buffering. */
+ OPUS_COPY(pcm, st->pcm_buf, FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
+ OPUS_COPY(&pcm[FWGAN_FRAME_SIZE-SUBFRAME_SIZE], new_pcm, SUBFRAME_SIZE);
+ OPUS_COPY(st->pcm_buf, &new_pcm[SUBFRAME_SIZE], FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
+}
+
+void fwgan_synthesize_int(FWGANState *st, opus_int16 *pcm, const float *features)
+{
+ int i;
+ float fpcm[FWGAN_FRAME_SIZE];
+ fwgan_synthesize(st, fpcm, features);
+ for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
+}
diff --git a/dnn/fwgan.h b/dnn/fwgan.h
new file mode 100644
index 00000000..acb7014d
--- /dev/null
+++ b/dnn/fwgan.h
@@ -0,0 +1,83 @@
+/* Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FWGAN_H
+#define FWGAN_H
+
+#include "freq.h"
+#include "fwgan_data.h"
+
+#define FWGAN_CONT_SAMPLES 320
+#define NB_SUBFRAMES 4
+#define SUBFRAME_SIZE 40
+#define FWGAN_FRAME_SIZE (NB_SUBFRAMES*SUBFRAME_SIZE)
+#define CONT_PCM_INPUTS 320
+#define MAX_CONT_SIZE CONT_NET_0_OUT_SIZE
+#define FWGAN_GAMMA 0.92f
+#define FWGAN_DEEMPHASIS 0.85f
+
+/* FIXME: Derive those from the model rather than hardcoding. */
+#define FWC1_STATE_SIZE 512
+#define FWC2_STATE_SIZE 512
+#define FWC3_STATE_SIZE 256
+#define FWC4_STATE_SIZE 256
+#define FWC5_STATE_SIZE 128
+#define FWC6_STATE_SIZE 128
+#define FWC7_STATE_SIZE 80
+
+typedef struct {
+ FWGAN model;
+ int arch;
+ int cont_initialized;
+ float embed_phase[2];
+ float last_gain;
+ float last_lpc[LPC_ORDER];
+ float syn_mem[LPC_ORDER];
+ float preemph_mem;
+ float deemph_mem;
+ float pcm_buf[FWGAN_FRAME_SIZE];
+ float cont[CONT_NET_10_OUT_SIZE];
+ float cont_conv1_mem[FEAT_IN_CONV1_CONV_STATE_SIZE];
+ float rnn_state[RNN_GRU_STATE_SIZE];
+ float fwc1_state[FWC1_STATE_SIZE];
+ float fwc2_state[FWC2_STATE_SIZE];
+ float fwc3_state[FWC3_STATE_SIZE];
+ float fwc4_state[FWC4_STATE_SIZE];
+ float fwc5_state[FWC5_STATE_SIZE];
+ float fwc6_state[FWC6_STATE_SIZE];
+ float fwc7_state[FWC7_STATE_SIZE];
+} FWGANState;
+
+void fwgan_init(FWGANState *st);
+int fwgan_load_model(FWGANState *st, const unsigned char *data, int len);
+
+void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0);
+
+void fwgan_synthesize(FWGANState *st, float *pcm, const float *features);
+void fwgan_synthesize_int(FWGANState *st, opus_int16 *pcm, const float *features);
+
+
+#endif /* FWGAN_H */
diff --git a/dnn/kiss99.c b/dnn/kiss99.c
new file mode 100644
index 00000000..325918f8
--- /dev/null
+++ b/dnn/kiss99.c
@@ -0,0 +1,81 @@
+/*Daala video codec
+Copyright (c) 2012 Daala project contributors. All rights reserved.
+Author: Timothy B. Terriberry
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "kiss99.h"
+
+void kiss99_srand(kiss99_ctx *_this,const unsigned char *_data,int _ndata){
+ int i;
+ _this->z=362436069;
+ _this->w=521288629;
+ _this->jsr=123456789;
+ _this->jcong=380116160;
+ for(i=3;i<_ndata;i+=4){
+ _this->z^=_data[i-3];
+ _this->w^=_data[i-2];
+ _this->jsr^=_data[i-1];
+ _this->jcong^=_data[i];
+ kiss99_rand(_this);
+ }
+ if(i-3<_ndata)_this->z^=_data[i-3];
+ if(i-2<_ndata)_this->w^=_data[i-2];
+ if(i-1<_ndata)_this->jsr^=_data[i-1];
+ /*Fix any potential short cycles that show up.
+ These are not too likely, given the way we initialize the state, but they
+ are technically possible, so let us go ahead and eliminate that
+ possibility.
+ See Gregory G. Rose: "KISS: A Bit Too Simple", Cryptographic Communications
+ No. 10, pp. 123---137, 2018.*/
+ if(_this->z==0||_this->z==0x9068FFFF)_this->z++;
+ if(_this->w==0||_this->w==0x464FFFFF)_this->w++;
+ if(_this->jsr==0)_this->jsr++;
+}
+
+uint32_t kiss99_rand(kiss99_ctx *_this){
+ uint32_t znew;
+ uint32_t wnew;
+ uint32_t mwc;
+ uint32_t shr3;
+ uint32_t cong;
+ znew=36969*(_this->z&0xFFFF)+(_this->z>>16);
+ wnew=18000*(_this->w&0xFFFF)+(_this->w>>16);
+ mwc=(znew<<16)+wnew;
+ /*We swap the 13 and 17 from the original 1999 algorithm to produce a single
+ cycle of maximal length, matching KISS11.
+ We are not actually using KISS11 because of the impractically large (16 MB)
+ internal state of the full algorithm.*/
+ shr3=_this->jsr^(_this->jsr<<13);
+ shr3^=shr3>>17;
+ shr3^=shr3<<5;
+ cong=69069*_this->jcong+1234567;
+ _this->z=znew;
+ _this->w=wnew;
+ _this->jsr=shr3;
+ _this->jcong=cong;
+ return (mwc^cong)+shr3;
+}
diff --git a/dnn/kiss99.h b/dnn/kiss99.h
new file mode 100644
index 00000000..28646dbc
--- /dev/null
+++ b/dnn/kiss99.h
@@ -0,0 +1,46 @@
+/*Daala video codec
+Copyright (c) 2012 Daala project contributors. All rights reserved.
+Author: Timothy B. Terriberry
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_kiss99_H)
+# define _kiss99_H (1)
+# include <stdint.h>
+
+/*KISS PRNG from George Marsaglia (1999 version).
+ See https://en.wikipedia.org/wiki/KISS_(algorithm) for details.
+ This is suitable for simulations, but not for use in crytographic contexts.*/
+
+typedef struct kiss99_ctx kiss99_ctx;
+
+struct kiss99_ctx{
+ uint32_t z;
+ uint32_t w;
+ uint32_t jsr;
+ uint32_t jcong;
+};
+
+void kiss99_srand(kiss99_ctx *_this,const unsigned char *_data,int _ndata);
+uint32_t kiss99_rand(kiss99_ctx *_this);
+
+#endif
diff --git a/dnn/lossgen.c b/dnn/lossgen.c
new file mode 100644
index 00000000..d0e781a8
--- /dev/null
+++ b/dnn/lossgen.c
@@ -0,0 +1,196 @@
+/* Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This packet loss simulator can be used independently of the Opus codebase.
+ To do that, you need to compile the following files:
+ dnn/lossgen.c
+ dnn/lossgen_data.c
+
+ with the following files needed as #include
+ dnn/lossgen_data.h
+ dnn/lossgen.h
+ dnn/nnet_arch.h
+ dnn/nnet.h
+ dnn/parse_lpcnet_weights.c (included despite being a C file)
+ dnn/vec_avx.h
+ dnn/vec.h
+ celt/os_support.h
+ celt/arch.h
+ celt/x86/x86_arch_macros.h
+ include/opus_defines.h
+ include/opus_types.h
+
+ Additionally, the code in dnn/lossgen_demo.c can be used to generate losses from
+ the command line.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "arch.h"
+
+#include <math.h>
+#include "lossgen.h"
+#include "os_support.h"
+#include "nnet.h"
+#include "assert.h"
+
+/* Disable RTCD for this. */
+#define RTCD_ARCH c
+
+/* Override assert to avoid undefined/redefined symbols. */
+#undef celt_assert
+#define celt_assert assert
+
+/* Directly include the C files we need since the symbols won't be exposed if we link in a shared object. */
+#include "parse_lpcnet_weights.c"
+#include "nnet_arch.h"
+
+#undef compute_linear
+#undef compute_activation
+
+/* Force the C version since the SIMD versions may be hidden. */
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
+
+#define MAX_RNN_NEURONS_ALL IMAX(LOSSGEN_GRU1_STATE_SIZE, LOSSGEN_GRU2_STATE_SIZE)
+
+/* These two functions are copied from nnet.c to make sure we don't have linking issues. */
+void compute_generic_gru_lossgen(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
+{
+ int i;
+ int N;
+ float zrh[3*MAX_RNN_NEURONS_ALL];
+ float recur[3*MAX_RNN_NEURONS_ALL];
+ float *z;
+ float *r;
+ float *h;
+ celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
+ celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
+ N = recurrent_weights->nb_inputs;
+ z = zrh;
+ r = &zrh[N];
+ h = &zrh[2*N];
+ celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
+ celt_assert(in != state);
+ compute_linear(input_weights, zrh, in, arch);
+ compute_linear(recurrent_weights, recur, state, arch);
+ for (i=0;i<2*N;i++)
+ zrh[i] += recur[i];
+ compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
+ for (i=0;i<N;i++)
+ h[i] += recur[2*N+i]*r[i];
+ compute_activation(h, h, N, ACTIVATION_TANH, arch);
+ for (i=0;i<N;i++)
+ h[i] = z[i]*state[i] + (1-z[i])*h[i];
+ for (i=0;i<N;i++)
+ state[i] = h[i];
+}
+
+
+void compute_generic_dense_lossgen(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
+{
+ compute_linear(layer, output, input, arch);
+ compute_activation(output, output, layer->nb_outputs, activation, arch);
+}
+
+
+static int sample_loss_impl(
+ LossGenState *st,
+ float percent_loss)
+{
+ float input[2];
+ float tmp[LOSSGEN_DENSE_IN_OUT_SIZE];
+ float out;
+ int loss;
+ LossGen *model = &st->model;
+ input[0] = st->last_loss;
+ input[1] = percent_loss;
+ compute_generic_dense_lossgen(&model->lossgen_dense_in, tmp, input, ACTIVATION_TANH, 0);
+ compute_generic_gru_lossgen(&model->lossgen_gru1_input, &model->lossgen_gru1_recurrent, st->gru1_state, tmp, 0);
+ compute_generic_gru_lossgen(&model->lossgen_gru2_input, &model->lossgen_gru2_recurrent, st->gru2_state, st->gru1_state, 0);
+ compute_generic_dense_lossgen(&model->lossgen_dense_out, &out, st->gru2_state, ACTIVATION_SIGMOID, 0);
+ loss = (float)rand()/RAND_MAX < out;
+ st->last_loss = loss;
+ return loss;
+}
+
+int sample_loss(
+ LossGenState *st,
+ float percent_loss)
+{
+ /* Due to GRU being initialized with zeros, the first packets aren't quite random,
+ so we skip them. */
+ if (!st->used) {
+ int i;
+ for (i=0;i<100;i++) sample_loss_impl(st, percent_loss);
+ st->used = 1;
+ }
+ return sample_loss_impl(st, percent_loss);
+}
+
+void lossgen_init(LossGenState *st)
+{
+ int ret;
+ OPUS_CLEAR(st, 1);
+#ifndef USE_WEIGHTS_FILE
+ ret = init_lossgen(&st->model, lossgen_arrays);
+#else
+ ret = 0;
+#endif
+ celt_assert(ret == 0);
+ (void)ret;
+}
+
+int lossgen_load_model(LossGenState *st, const void *data, int len) {
+ WeightArray *list;
+ int ret;
+ parse_weights(&list, data, len);
+ ret = init_lossgen(&st->model, list);
+ opus_free(list);
+ if (ret == 0) return 0;
+ else return -1;
+}
+
+#if 0
+#include <stdio.h>
+int main(int argc, char **argv) {
+ int i, N;
+ float p;
+ LossGenState st;
+ if (argc!=3) {
+ fprintf(stderr, "usage: lossgen <percentage> <length>\n");
+ return 1;
+ }
+ lossgen_init(&st);
+ p = atof(argv[1]);
+ N = atoi(argv[2]);
+ for (i=0;i<N;i++) {
+ printf("%d\n", sample_loss(&st, p));
+ }
+}
+#endif
diff --git a/dnn/lossgen.h b/dnn/lossgen.h
new file mode 100644
index 00000000..318f32f7
--- /dev/null
+++ b/dnn/lossgen.h
@@ -0,0 +1,55 @@
+/* Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef LOSSGEN_H
+#define LOSSGEN_H
+
+
+#include "lossgen_data.h"
+
+#define PITCH_MIN_PERIOD 32
+#define PITCH_MAX_PERIOD 256
+
+#define NB_XCORR_FEATURES (PITCH_MAX_PERIOD-PITCH_MIN_PERIOD)
+
+
+typedef struct {
+ LossGen model;
+ float gru1_state[LOSSGEN_GRU1_STATE_SIZE];
+ float gru2_state[LOSSGEN_GRU2_STATE_SIZE];
+ int last_loss;
+ int used;
+} LossGenState;
+
+
+void lossgen_init(LossGenState *st);
+int lossgen_load_model(LossGenState *st, const void *data, int len);
+
+int sample_loss(
+ LossGenState *st,
+ float percent_loss);
+
+#endif
diff --git a/dnn/lossgen_demo.c b/dnn/lossgen_demo.c
new file mode 100644
index 00000000..bad7bdc3
--- /dev/null
+++ b/dnn/lossgen_demo.c
@@ -0,0 +1,22 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "lossgen.h"
+int main(int argc, char **argv)
+{
+ LossGenState st;
+ long num_packets;
+ long i;
+ float percent;
+ if (argc != 3) {
+ fprintf(stderr, "usage: %s <percent_loss> <nb packets>\n", argv[0]);
+ return 1;
+ }
+ lossgen_init(&st);
+ percent = atof(argv[1]);
+ num_packets = atol(argv[2]);
+ /*printf("loss: %f %d\n", percent, num_packets);*/
+ for (i=0;i<num_packets;i++) {
+ printf("%d\n", sample_loss(&st, percent*0.01f));
+ }
+ return 0;
+}
diff --git a/dnn/lpcnet.c b/dnn/lpcnet.c
new file mode 100644
index 00000000..52e81b07
--- /dev/null
+++ b/dnn/lpcnet.c
@@ -0,0 +1,283 @@
+/* Copyright (c) 2018 Mozilla */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+#include "nnet_data.h"
+#include "nnet.h"
+#include "common.h"
+#include "arch.h"
+#include "lpcnet.h"
+#include "lpcnet_private.h"
+#include "os_support.h"
+
+#define PREEMPH 0.85f
+
+#define PDF_FLOOR 0.002
+
+#define FRAME_INPUT_SIZE (NB_FEATURES + EMBED_PITCH_OUT_SIZE)
+
+
+#if 0
+static void print_vector(float *x, int N)
+{
+ int i;
+ for (i=0;i<N;i++) printf("%f ", x[i]);
+ printf("\n");
+}
+#endif
+
+#ifdef END2END
+void rc2lpc(float *lpc, const float *rc)
+{
+ int i, j, k;
+ float tmp[LPC_ORDER];
+ float ntmp[LPC_ORDER] = {0.0};
+ OPUS_COPY(tmp, rc, LPC_ORDER);
+ for(i = 0; i < LPC_ORDER ; i++)
+ {
+ for(j = 0; j <= i-1; j++)
+ {
+ ntmp[j] = tmp[j] + tmp[i]*tmp[i - j - 1];
+ }
+ for(k = 0; k <= i-1; k++)
+ {
+ tmp[k] = ntmp[k];
+ }
+ }
+ for(i = 0; i < LPC_ORDER ; i++)
+ {
+ lpc[i] = tmp[i];
+ }
+}
+
+#endif
+
+void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features)
+{
+ NNetState *net;
+ float condition[FEATURE_DENSE2_OUT_SIZE];
+ float in[FRAME_INPUT_SIZE];
+ float conv1_out[FEATURE_CONV1_OUT_SIZE];
+ float conv2_out[FEATURE_CONV2_OUT_SIZE];
+ float dense1_out[FEATURE_DENSE1_OUT_SIZE];
+ int pitch;
+ float rc[LPC_ORDER];
+ /* Matches the Python code -- the 0.1 avoids rounding issues. */
+ pitch = (int)floor(.1 + 50*features[NB_BANDS]+100);
+ pitch = IMIN(255, IMAX(33, pitch));
+ net = &lpcnet->nnet;
+ OPUS_COPY(in, features, NB_FEATURES);
+ compute_embedding(&lpcnet->model.embed_pitch, &in[NB_FEATURES], pitch);
+ compute_conv1d(&lpcnet->model.feature_conv1, conv1_out, net->feature_conv1_state, in);
+ if (lpcnet->frame_count < FEATURE_CONV1_DELAY) OPUS_CLEAR(conv1_out, FEATURE_CONV1_OUT_SIZE);
+ compute_conv1d(&lpcnet->model.feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);
+ if (lpcnet->frame_count < FEATURES_DELAY) OPUS_CLEAR(conv2_out, FEATURE_CONV2_OUT_SIZE);
+ _lpcnet_compute_dense(&lpcnet->model.feature_dense1, dense1_out, conv2_out);
+ _lpcnet_compute_dense(&lpcnet->model.feature_dense2, condition, dense1_out);
+ OPUS_COPY(rc, condition, LPC_ORDER);
+ _lpcnet_compute_dense(&lpcnet->model.gru_a_dense_feature, gru_a_condition, condition);
+ _lpcnet_compute_dense(&lpcnet->model.gru_b_dense_feature, gru_b_condition, condition);
+#ifdef END2END
+ rc2lpc(lpc, rc);
+#elif FEATURES_DELAY>0
+ memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
+ memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
+ lpc_from_cepstrum(lpcnet->old_lpc[0], features);
+#else
+ lpc_from_cepstrum(lpc, features);
+#endif
+#ifdef LPC_GAMMA
+ lpc_weighting(lpc, LPC_GAMMA);
+#endif
+ if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
+}
+
+void run_frame_network_deferred(LPCNetState *lpcnet, const float *features)
+{
+ int max_buffer_size = lpcnet->model.feature_conv1.kernel_size + lpcnet->model.feature_conv2.kernel_size - 2;
+ celt_assert(max_buffer_size <= MAX_FEATURE_BUFFER_SIZE);
+ if (lpcnet->feature_buffer_fill == max_buffer_size) {
+ OPUS_MOVE(lpcnet->feature_buffer, &lpcnet->feature_buffer[NB_FEATURES], (max_buffer_size-1)*NB_FEATURES);
+ } else {
+ lpcnet->feature_buffer_fill++;
+ }
+ OPUS_COPY(&lpcnet->feature_buffer[(lpcnet->feature_buffer_fill-1)*NB_FEATURES], features, NB_FEATURES);
+}
+
+void run_frame_network_flush(LPCNetState *lpcnet)
+{
+ int i;
+ for (i=0;i<lpcnet->feature_buffer_fill;i++) {
+ float lpc[LPC_ORDER];
+ float gru_a_condition[3*GRU_A_STATE_SIZE];
+ float gru_b_condition[3*GRU_B_STATE_SIZE];
+ run_frame_network(lpcnet, gru_a_condition, gru_b_condition, lpc, &lpcnet->feature_buffer[i*NB_FEATURES]);
+ }
+ lpcnet->feature_buffer_fill = 0;
+}
+
+int run_sample_network(LPCNetState *lpcnet, const float *gru_a_condition, const float *gru_b_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table, kiss99_ctx *rng)
+{
+ NNetState *net;
+ float gru_a_input[3*GRU_A_STATE_SIZE];
+ float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];
+ float gru_b_input[3*GRU_B_STATE_SIZE];
+ net = &lpcnet->nnet;
+#if 1
+ compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &lpcnet->model.gru_a_embed_sig, last_sig, &lpcnet->model.gru_a_embed_pred, pred, &lpcnet->model.gru_a_embed_exc, last_exc);
+#else
+ OPUS_COPY(gru_a_input, gru_a_condition, 3*GRU_A_STATE_SIZE);
+ accum_embedding(&lpcnet->model.gru_a_embed_sig, gru_a_input, last_sig);
+ accum_embedding(&lpcnet->model.gru_a_embed_pred, gru_a_input, pred);
+ accum_embedding(&lpcnet->model.gru_a_embed_exc, gru_a_input, last_exc);
+#endif
+ /*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/
+ compute_sparse_gru(&lpcnet->model.sparse_gru_a, net->gru_a_state, gru_a_input);
+ OPUS_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
+ OPUS_COPY(gru_b_input, gru_b_condition, 3*GRU_B_STATE_SIZE);
+ compute_gruB(&lpcnet->model.gru_b, gru_b_input, net->gru_b_state, in_b);
+ return sample_mdense(&lpcnet->model.dual_fc, net->gru_b_state, sampling_logit_table, rng);
+}
+
+int lpcnet_get_size()
+{
+ return sizeof(LPCNetState);
+}
+
+void lpcnet_reset(LPCNetState *lpcnet)
+{
+ const char* rng_string="LPCNet";
+ OPUS_CLEAR((char*)&lpcnet->LPCNET_RESET_START,
+ sizeof(LPCNetState)-
+ ((char*)&lpcnet->LPCNET_RESET_START - (char*)lpcnet));
+ lpcnet->last_exc = lin2ulaw(0.f);
+ kiss99_srand(&lpcnet->rng, (const unsigned char *)rng_string, strlen(rng_string));
+}
+
+int lpcnet_init(LPCNetState *lpcnet)
+{
+ int i;
+ int ret;
+ for (i=0;i<256;i++) {
+ float prob = .025f+.95f*i/255.f;
+ lpcnet->sampling_logit_table[i] = -log((1-prob)/prob);
+ }
+#ifndef USE_WEIGHTS_FILE
+ ret = init_lpcnet_model(&lpcnet->model, lpcnet_arrays);
+#else
+ ret = 0;
+#endif
+ lpcnet_reset(lpcnet);
+ celt_assert(ret == 0);
+ return ret;
+}
+
+int lpcnet_load_model(LPCNetState *st, const unsigned char *data, int len) {
+ WeightArray *list;
+ int ret;
+ parse_weights(&list, data, len);
+ ret = init_lpcnet_model(&st->model, list);
+ opus_free(list);
+ if (ret == 0) return 0;
+ else return -1;
+}
+
+
+LPCNetState *lpcnet_create()
+{
+ LPCNetState *lpcnet;
+ lpcnet = (LPCNetState *)opus_alloc(lpcnet_get_size(), 1);
+ OPUS_CLEAR(lpcnet, 1);
+ lpcnet_init(lpcnet);
+ return lpcnet;
+}
+
+void lpcnet_destroy(LPCNetState *lpcnet)
+{
+ opus_free(lpcnet);
+}
+
+void lpcnet_reset_signal(LPCNetState *lpcnet)
+{
+ lpcnet->deemph_mem = 0;
+ lpcnet->last_exc = lin2ulaw(0.f);
+ OPUS_CLEAR(lpcnet->last_sig, LPC_ORDER);
+ OPUS_CLEAR(lpcnet->nnet.gru_a_state, GRU_A_STATE_SIZE);
+ OPUS_CLEAR(lpcnet->nnet.gru_b_state, GRU_B_STATE_SIZE);
+}
+
+void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload)
+{
+ int i;
+
+ if (lpcnet->frame_count <= FEATURES_DELAY)
+ {
+ OPUS_CLEAR(output, N);
+ return;
+ }
+ for (i=0;i<N;i++)
+ {
+ int j;
+ float pcm;
+ int exc;
+ int last_sig_ulaw;
+ int pred_ulaw;
+ float pred = 0;
+ for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpcnet->lpc[j];
+ last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
+ pred_ulaw = lin2ulaw(pred);
+ exc = run_sample_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
+ if (i < preload) {
+ exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
+ pcm = output[i]-PREEMPH*lpcnet->deemph_mem;
+ } else {
+ pcm = pred + ulaw2lin(exc);
+ }
+ OPUS_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
+ lpcnet->last_sig[0] = pcm;
+ lpcnet->last_exc = exc;
+ pcm += PREEMPH*lpcnet->deemph_mem;
+ lpcnet->deemph_mem = pcm;
+ if (pcm<-32767) pcm = -32767;
+ if (pcm>32767) pcm = 32767;
+ if (i >= preload) output[i] = (int)floor(.5 + pcm);
+ }
+}
+
+void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload)
+{
+ run_frame_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->lpc, features);
+ lpcnet_synthesize_tail_impl(lpcnet, output, N, preload);
+}
+
+void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N) {
+ lpcnet_synthesize_impl(lpcnet, features, output, N, 0);
+}
diff --git a/dnn/lpcnet.h b/dnn/lpcnet.h
new file mode 100644
index 00000000..4a981246
--- /dev/null
+++ b/dnn/lpcnet.h
@@ -0,0 +1,183 @@
+/* Copyright (c) 2018 Mozilla */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef LPCNET_H_
+#define LPCNET_H_
+
+#include "opus_types.h"
+
+#define NB_FEATURES 20
+#define NB_TOTAL_FEATURES 36
+
+/** Number of audio samples in a feature frame (not for encoding/decoding). */
+#define LPCNET_FRAME_SIZE (160)
+
+typedef struct LPCNetState LPCNetState;
+
+typedef struct LPCNetDecState LPCNetDecState;
+
+typedef struct LPCNetEncState LPCNetEncState;
+
+typedef struct LPCNetPLCState LPCNetPLCState;
+
+
+/** Gets the size of an <code>LPCNetDecState</code> structure.
+ * @returns The size in bytes.
+ */
+int lpcnet_decoder_get_size(void);
+
+/** Initializes a previously allocated decoder state
+ * The memory pointed to by st must be at least the size returned by lpcnet_decoder_get_size().
+ * This is intended for applications which use their own allocator instead of malloc.
+ * @see lpcnet_decoder_create(),lpcnet_decoder_get_size()
+ * @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
+ * @retval 0 Success
+ */
+int lpcnet_decoder_init(LPCNetDecState *st);
+
+void lpcnet_reset(LPCNetState *lpcnet);
+
+/** Allocates and initializes a decoder state.
+ * @returns The newly created state
+ */
+LPCNetDecState *lpcnet_decoder_create(void);
+
+/** Frees an <code>LPCNetDecState</code> allocated by lpcnet_decoder_create().
+ * @param[in] st <tt>LPCNetDecState*</tt>: State to be freed.
+ */
+void lpcnet_decoder_destroy(LPCNetDecState *st);
+
+/** Decodes a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8) into LPCNET_PACKET_SAMPLES samples (currently 640).
+ * @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
+ * @param [in] buf <tt>const unsigned char *</tt>: Compressed packet
+ * @param [out] pcm <tt>opus_int16 *</tt>: Decoded audio
+ * @retval 0 Success
+ */
+int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, opus_int16 *pcm);
+
+
+
+/** Gets the size of an <code>LPCNetEncState</code> structure.
+ * @returns The size in bytes.
+ */
+int lpcnet_encoder_get_size(void);
+
+/** Initializes a previously allocated encoder state
+ * The memory pointed to by st must be at least the size returned by lpcnet_encoder_get_size().
+ * This is intended for applications which use their own allocator instead of malloc.
+ * @see lpcnet_encoder_create(),lpcnet_encoder_get_size()
+ * @param [in] st <tt>LPCNetEncState*</tt>: Encoder state
+ * @retval 0 Success
+ */
+int lpcnet_encoder_init(LPCNetEncState *st);
+
+int lpcnet_encoder_load_model(LPCNetEncState *st, const void *data, int len);
+
+/** Allocates and initializes an encoder state.
+ * @returns The newly created state
+ */
+LPCNetEncState *lpcnet_encoder_create(void);
+
+/** Frees an <code>LPCNetEncState</code> allocated by lpcnet_encoder_create().
+ * @param[in] st <tt>LPCNetEncState*</tt>: State to be freed.
+ */
+void lpcnet_encoder_destroy(LPCNetEncState *st);
+
+/** Encodes LPCNET_PACKET_SAMPLES speech samples (currently 640) into a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8).
+ * @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
+ * @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be encoded
+ * @param [out] buf <tt>const unsigned char *</tt>: Compressed packet
+ * @retval 0 Success
+ */
+int lpcnet_encode(LPCNetEncState *st, const opus_int16 *pcm, unsigned char *buf);
+
+/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
+ * @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
+ * @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be analyzed
+ * @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
+ * @retval 0 Success
+ */
+int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch);
+
+
+/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
+ * @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
+ * @param [in] pcm <tt>float *</tt>: Input speech to be analyzed
+ * @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
+ * @retval 0 Success
+ */
+int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch);
+
+/** Gets the size of an <code>LPCNetState</code> structure.
+ * @returns The size in bytes.
+ */
+int lpcnet_get_size(void);
+
+/** Initializes a previously allocated synthesis state
+ * The memory pointed to by st must be at least the size returned by lpcnet_get_size().
+ * This is intended for applications which use their own allocator instead of malloc.
+ * @see lpcnet_create(),lpcnet_get_size()
+ * @param [in] st <tt>LPCNetState*</tt>: Synthesis state
+ * @retval 0 Success
+ */
+int lpcnet_init(LPCNetState *st);
+
+/** Allocates and initializes a synthesis state.
+ * @returns The newly created state
+ */
+LPCNetState *lpcnet_create(void);
+
+/** Frees an <code>LPCNetState</code> allocated by lpcnet_create().
+ * @param[in] st <tt>LPCNetState*</tt>: State to be freed.
+ */
+void lpcnet_destroy(LPCNetState *st);
+
+/** Synthesizes speech from an LPCNet feature vector.
+ * @param [in] st <tt>LPCNetState*</tt>: Synthesis state
+ * @param [in] features <tt>const float *</tt>: Compressed packet
+ * @param [out] output <tt>opus_int16 **</tt>: Synthesized speech
+ * @param [in] N <tt>int</tt>: Number of samples to generate
+ * @retval 0 Success
+ */
+void lpcnet_synthesize(LPCNetState *st, const float *features, opus_int16 *output, int N);
+
+
+
+int lpcnet_plc_init(LPCNetPLCState *st);
+void lpcnet_plc_reset(LPCNetPLCState *st);
+
+int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm);
+
+int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm);
+
+void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features);
+
+void lpcnet_plc_fec_clear(LPCNetPLCState *st);
+
+int lpcnet_load_model(LPCNetState *st, const void *data, int len);
+int lpcnet_plc_load_model(LPCNetPLCState *st, const void *data, int len);
+
+#endif
diff --git a/dnn/lpcnet_demo.c b/dnn/lpcnet_demo.c
new file mode 100644
index 00000000..59651f3e
--- /dev/null
+++ b/dnn/lpcnet_demo.c
@@ -0,0 +1,217 @@
+/* Copyright (c) 2018 Mozilla */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "arch.h"
+#include "lpcnet.h"
+#include "freq.h"
+#include "os_support.h"
+#include "fargan.h"
+#include "cpu_support.h"
+
+#ifdef USE_WEIGHTS_FILE
+# if __unix__
+# include <fcntl.h>
+# include <sys/mman.h>
+# include <unistd.h>
+# include <sys/stat.h>
+/* When available, mmap() is preferable to reading the file, as it leads to
+ better resource utilization, especially if multiple processes are using the same
+ file (mapping will be shared in cache). */
+void *load_blob(const char *filename, int *len) {
+ int fd;
+ void *data;
+ struct stat st;
+ if (stat(filename, &st)) {
+ *len = 0;
+ return NULL;
+ }
+ *len = st.st_size;
+ fd = open(filename, O_RDONLY);
+ if (fd<0) {
+ *len = 0;
+ return NULL;
+ }
+ data = mmap(NULL, *len, PROT_READ, MAP_SHARED, fd, 0);
+ if (data == MAP_FAILED) {
+ *len = 0;
+ data = NULL;
+ }
+ close(fd);
+ return data;
+}
+void free_blob(void *blob, int len) {
+ if (blob) munmap(blob, len);
+}
+# else
+void *load_blob(const char *filename, int *len) {
+ FILE *file;
+ void *data;
+ file = fopen(filename, "r");
+ if (file == NULL)
+ {
+ perror("could not open blob file");
+ *len = 0;
+ return NULL;
+ }
+ fseek(file, 0L, SEEK_END);
+ *len = ftell(file);
+ fseek(file, 0L, SEEK_SET);
+ if (*len <= 0) {
+ *len = 0;
+ return NULL;
+ }
+ data = malloc(*len);
+ if (!data) {
+ *len = 0;
+ return NULL;
+ }
+ *len = fread(data, 1, *len, file);
+ return data;
+}
+void free_blob(void *blob, int len) {
+ free(blob);
+ (void)len;
+}
+# endif
+#endif
+
+#define MODE_FEATURES 2
+/*#define MODE_SYNTHESIS 3*/
+#define MODE_ADDLPC 5
+#define MODE_FWGAN_SYNTHESIS 6
+#define MODE_FARGAN_SYNTHESIS 7
+
+void usage(void) {
+ fprintf(stderr, "usage: lpcnet_demo -features <input.pcm> <features.f32>\n");
+ fprintf(stderr, " lpcnet_demo -fargan-synthesis <features.f32> <output.pcm>\n");
+ fprintf(stderr, " lpcnet_demo -addlpc <features_without_lpc.f32> <features_with_lpc.lpc>\n\n");
+ fprintf(stderr, " plc_options:\n");
+ fprintf(stderr, " causal: normal (causal) PLC\n");
+ fprintf(stderr, " codec: normal (causal) PLC without cross-fade (will glitch)\n");
+ exit(1);
+}
+
+int main(int argc, char **argv) {
+ int mode=0;
+ int arch;
+ FILE *fin, *fout;
+#ifdef USE_WEIGHTS_FILE
+ int len;
+ void *data;
+ const char *filename = "weights_blob.bin";
+#endif
+ arch = opus_select_arch();
+ if (argc < 4) usage();
+ if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
+ else if (strcmp(argv[1], "-fargan-synthesis") == 0) mode=MODE_FARGAN_SYNTHESIS;
+ else if (strcmp(argv[1], "-addlpc") == 0){
+ mode=MODE_ADDLPC;
+ } else {
+ usage();
+ }
+ if (argc != 4) usage();
+ fin = fopen(argv[2], "rb");
+ if (fin == NULL) {
+ fprintf(stderr, "Can't open %s\n", argv[2]);
+ exit(1);
+ }
+
+ fout = fopen(argv[3], "wb");
+ if (fout == NULL) {
+ fprintf(stderr, "Can't open %s\n", argv[3]);
+ exit(1);
+ }
+#ifdef USE_WEIGHTS_FILE
+ data = load_blob(filename, &len);
+#endif
+ if (mode == MODE_FEATURES) {
+ LPCNetEncState *net;
+ net = lpcnet_encoder_create();
+ while (1) {
+ float features[NB_TOTAL_FEATURES];
+ opus_int16 pcm[LPCNET_FRAME_SIZE];
+ size_t ret;
+ ret = fread(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fin);
+ if (feof(fin) || ret != LPCNET_FRAME_SIZE) break;
+ lpcnet_compute_single_frame_features(net, pcm, features, arch);
+ fwrite(features, sizeof(float), NB_TOTAL_FEATURES, fout);
+ }
+ lpcnet_encoder_destroy(net);
+ } else if (mode == MODE_FARGAN_SYNTHESIS) {
+ FARGANState fargan;
+ size_t ret, i;
+ float in_features[5*NB_TOTAL_FEATURES];
+ float zeros[320] = {0};
+ fargan_init(&fargan);
+#ifdef USE_WEIGHTS_FILE
+ fargan_load_model(&fargan, data, len);
+#endif
+ /* uncomment the following to align with Python code */
+ /*ret = fread(&in_features[0], sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);*/
+ for (i=0;i<5;i++) {
+ ret = fread(&in_features[i*NB_FEATURES], sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);
+ }
+ fargan_cont(&fargan, zeros, in_features);
+ while (1) {
+ float features[NB_FEATURES];
+ float fpcm[LPCNET_FRAME_SIZE];
+ opus_int16 pcm[LPCNET_FRAME_SIZE];
+ ret = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
+ if (feof(fin) || ret != NB_TOTAL_FEATURES) break;
+ OPUS_COPY(features, in_features, NB_FEATURES);
+ fargan_synthesize(&fargan, fpcm, features);
+ for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
+ fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout);
+ }
+ } else if (mode == MODE_ADDLPC) {
+ float features[36];
+ size_t ret;
+
+ while (1) {
+ ret = fread(features, sizeof(features[0]), 36, fin);
+ if (ret != 36 || feof(fin)) break;
+ lpc_from_cepstrum(&features[20], &features[0]);
+ fwrite(features, sizeof(features[0]), 36, fout);
+ }
+
+ } else {
+ fprintf(stderr, "unknown action\n");
+ }
+ fclose(fin);
+ fclose(fout);
+#ifdef USE_WEIGHTS_FILE
+ free_blob(data, len);
+#endif
+ return 0;
+}
diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c
new file mode 100644
index 00000000..52930416
--- /dev/null
+++ b/dnn/lpcnet_enc.c
@@ -0,0 +1,230 @@
+/* Copyright (c) 2017-2019 Mozilla */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "kiss_fft.h"
+#include "common.h"
+#include <math.h>
+#include "freq.h"
+#include "pitch.h"
+#include "arch.h"
+#include <assert.h>
+#include "lpcnet_private.h"
+#include "lpcnet.h"
+#include "os_support.h"
+#include "_kiss_fft_guts.h"
+#include "celt_lpc.h"
+#include "mathops.h"
+
+
+int lpcnet_encoder_get_size(void) {
+ return sizeof(LPCNetEncState);
+}
+
+int lpcnet_encoder_init(LPCNetEncState *st) {
+ memset(st, 0, sizeof(*st));
+ pitchdnn_init(&st->pitchdnn);
+ return 0;
+}
+
+int lpcnet_encoder_load_model(LPCNetEncState *st, const void *data, int len) {
+ return pitchdnn_load_model(&st->pitchdnn, data, len);
+}
+
+LPCNetEncState *lpcnet_encoder_create(void) {
+ LPCNetEncState *st;
+ st = opus_alloc(lpcnet_encoder_get_size());
+ lpcnet_encoder_init(st);
+ return st;
+}
+
+void lpcnet_encoder_destroy(LPCNetEncState *st) {
+ opus_free(st);
+}
+
+static void frame_analysis(LPCNetEncState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
+ float x[WINDOW_SIZE];
+ OPUS_COPY(x, st->analysis_mem, OVERLAP_SIZE);
+ OPUS_COPY(&x[OVERLAP_SIZE], in, FRAME_SIZE);
+ OPUS_COPY(st->analysis_mem, &in[FRAME_SIZE-OVERLAP_SIZE], OVERLAP_SIZE);
+ apply_window(x);
+ forward_transform(X, x);
+ lpcn_compute_band_energy(Ex, X);
+}
+
+static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
+ int i;
+ float mem0, mem1;
+ mem0 = mem[0];
+ mem1 = mem[1];
+ for (i=0;i<N;i++) {
+ float xi, yi, mem00;
+ xi = x[i];
+ yi = x[i] + mem0;
+ mem00 = mem0;
+ /* Original code:
+ mem0 = mem1 + (b[0]*xi - a[0]*yi);
+ mem1 = (b[1]*xi - a[1]*yi);
+ Modified to reduce dependency chains: (the +1e-30f forces the ordering and has no effect on the output)
+ */
+ mem0 = (b[0]-a[0])*xi + mem1 - a[0]*mem0;
+ mem1 = (b[1]-a[1])*xi + 1e-30f - a[1]*mem00;
+ y[i] = yi;
+ }
+ mem[0] = mem0;
+ mem[1] = mem1;
+}
+
+#define celt_log10(x) (0.3010299957f*celt_log2(x))
+
+void compute_frame_features(LPCNetEncState *st, const float *in, int arch) {
+ float aligned_in[FRAME_SIZE];
+ int i;
+ float Ly[NB_BANDS];
+ float follow, logMax;
+ kiss_fft_cpx X[FREQ_SIZE];
+ float Ex[NB_BANDS];
+ float xcorr[PITCH_MAX_PERIOD];
+ float ener0;
+ float ener;
+ float x[FRAME_SIZE+LPC_ORDER];
+ float frame_corr;
+ float xy, xx, yy;
+ int pitch;
+ float ener_norm[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
+ /* [b,a]=ellip(2, 2, 20, 1200/8000); */
+ static const float lp_b[2] = {-0.84946f, 1.f};
+ static const float lp_a[2] = {-1.54220f, 0.70781f};
+ OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
+ frame_analysis(st, X, Ex, in);
+ st->if_features[0] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[0].r*X[0].r)-6.f)));
+ for (i=1;i<PITCH_IF_MAX_FREQ;i++) {
+ kiss_fft_cpx prod;
+ float norm_1;
+ C_MULC(prod, X[i], st->prev_if[i]);
+ norm_1 = 1.f/sqrt(1e-15f + prod.r*prod.r + prod.i*prod.i);
+ C_MULBYSCALAR(prod, norm_1);
+ st->if_features[3*i-2] = prod.r;
+ st->if_features[3*i-1] = prod.i;
+ st->if_features[3*i] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[i].r*X[i].r + X[i].i*X[i].i)-6.f)));
+ }
+ OPUS_COPY(st->prev_if, X, PITCH_IF_MAX_FREQ);
+ /*for (i=0;i<88;i++) printf("%f ", st->if_features[i]);printf("\n");*/
+ logMax = -2;
+ follow = -2;
+ for (i=0;i<NB_BANDS;i++) {
+ Ly[i] = celt_log10(1e-2f+Ex[i]);
+ Ly[i] = MAX16(logMax-8, MAX16(follow-2.5f, Ly[i]));
+ logMax = MAX16(logMax, Ly[i]);
+ follow = MAX16(follow-2.5f, Ly[i]);
+ }
+ dct(st->features, Ly);
+ st->features[0] -= 4;
+ lpc_from_cepstrum(st->lpc, st->features);
+ for (i=0;i<LPC_ORDER;i++) st->features[NB_BANDS+2+i] = st->lpc[i];
+ OPUS_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
+ OPUS_MOVE(st->lp_buf, &st->lp_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
+ OPUS_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
+ OPUS_COPY(&x[0], st->pitch_mem, LPC_ORDER);
+ OPUS_COPY(&x[LPC_ORDER], aligned_in, FRAME_SIZE);
+ OPUS_COPY(st->pitch_mem, &aligned_in[FRAME_SIZE-LPC_ORDER], LPC_ORDER);
+ celt_fir(&x[LPC_ORDER], st->lpc, &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, LPC_ORDER, arch);
+ for (i=0;i<FRAME_SIZE;i++) {
+ st->exc_buf[PITCH_MAX_PERIOD+i] = st->lp_buf[PITCH_MAX_PERIOD+i] + .7f*st->pitch_filt;
+ st->pitch_filt = st->lp_buf[PITCH_MAX_PERIOD+i];
+ /*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
+ }
+ biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE);
+ {
+ double ener1;
+ float *buf = st->exc_buf;
+ celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, arch);
+ ener0 = celt_inner_prod(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
+ ener1 = celt_inner_prod(&buf[0], &buf[0], FRAME_SIZE, arch);
+ /*printf("%f\n", st->frame_weight[sub]);*/
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
+ ener = 1 + ener0 + ener1;
+ st->xcorr_features[i] = 2*xcorr[i];
+ ener_norm[i] = ener;
+ ener1 += buf[i+FRAME_SIZE]*(double)buf[i+FRAME_SIZE] - buf[i]*(double)buf[i];
+ /*printf("%f ", st->xcorr_features[i]);*/
+ }
+ /* Split in a separate loop so the compiler can vectorize it */
+ for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
+ st->xcorr_features[i] /= ener_norm[i];
+ }
+ /*printf("\n");*/
+ }
+ st->dnn_pitch = compute_pitchdnn(&st->pitchdnn, st->if_features, st->xcorr_features, arch);
+ pitch = (int)floor(.5+256./pow(2.f,((1./60.)*((st->dnn_pitch+1.5)*60))));
+ xx = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
+ yy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD-pitch], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
+ xy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
+ /*printf("%f %f\n", frame_corr, xy/sqrt(1e-15+xx*yy));*/
+ frame_corr = xy/sqrt(1+xx*yy);
+ frame_corr = log(1.f+exp(5.f*frame_corr))/log(1+exp(5.f));
+ st->features[NB_BANDS] = st->dnn_pitch;
+ st->features[NB_BANDS + 1] = frame_corr-.5f;
+}
+
+void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
+ int i;
+ for (i=0;i<N;i++) {
+ float yi;
+ yi = x[i] + *mem;
+ *mem = -coef*x[i];
+ y[i] = yi;
+ }
+}
+
+static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES], int arch) {
+ preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
+ compute_frame_features(st, x, arch);
+ OPUS_COPY(features, &st->features[0], NB_TOTAL_FEATURES);
+ return 0;
+}
+
+int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch) {
+ int i;
+ float x[FRAME_SIZE];
+ for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
+ lpcnet_compute_single_frame_features_impl(st, x, features, arch);
+ return 0;
+}
+
+int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch) {
+ int i;
+ float x[FRAME_SIZE];
+ for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
+ lpcnet_compute_single_frame_features_impl(st, x, features, arch);
+ return 0;
+}
diff --git a/dnn/lpcnet_plc.c b/dnn/lpcnet_plc.c
new file mode 100644
index 00000000..55122779
--- /dev/null
+++ b/dnn/lpcnet_plc.c
@@ -0,0 +1,211 @@
+/* Copyright (c) 2021 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "lpcnet_private.h"
+#include "lpcnet.h"
+#include "plc_data.h"
+#include "os_support.h"
+#include "common.h"
+#include "cpu_support.h"
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+/* Comment this out to have LPCNet update its state on every good packet (slow). */
+#define PLC_SKIP_UPDATES
+
+void lpcnet_plc_reset(LPCNetPLCState *st) {
+ OPUS_CLEAR((char*)&st->LPCNET_PLC_RESET_START,
+ sizeof(LPCNetPLCState)-
+ ((char*)&st->LPCNET_PLC_RESET_START - (char*)st));
+ lpcnet_encoder_init(&st->enc);
+ OPUS_CLEAR(st->pcm, PLC_BUF_SIZE);
+ st->blend = 0;
+ st->loss_count = 0;
+ st->analysis_gap = 1;
+ st->analysis_pos = PLC_BUF_SIZE;
+ st->predict_pos = PLC_BUF_SIZE;
+}
+
+int lpcnet_plc_init(LPCNetPLCState *st) {
+ int ret;
+ st->arch = opus_select_arch();
+ fargan_init(&st->fargan);
+ lpcnet_encoder_init(&st->enc);
+ st->loaded = 0;
+#ifndef USE_WEIGHTS_FILE
+ ret = init_plcmodel(&st->model, plcmodel_arrays);
+ if (ret == 0) st->loaded = 1;
+#else
+ ret = 0;
+#endif
+ celt_assert(ret == 0);
+ lpcnet_plc_reset(st);
+ return ret;
+}
+
+int lpcnet_plc_load_model(LPCNetPLCState *st, const void *data, int len) {
+ WeightArray *list;
+ int ret;
+ parse_weights(&list, data, len);
+ ret = init_plcmodel(&st->model, list);
+ opus_free(list);
+ if (ret == 0) {
+ ret = lpcnet_encoder_load_model(&st->enc, data, len);
+ }
+ if (ret == 0) {
+ ret = fargan_load_model(&st->fargan, data, len);
+ }
+ if (ret == 0) st->loaded = 1;
+ return ret;
+}
+
+void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features) {
+ if (features == NULL) {
+ st->fec_skip++;
+ return;
+ }
+ if (st->fec_fill_pos == PLC_MAX_FEC) {
+ OPUS_MOVE(&st->fec[0][0], &st->fec[st->fec_read_pos][0], (st->fec_fill_pos-st->fec_read_pos)*NB_FEATURES);
+ st->fec_fill_pos = st->fec_fill_pos-st->fec_read_pos;
+ st->fec_read_pos -= st->fec_read_pos;
+ }
+ OPUS_COPY(&st->fec[st->fec_fill_pos][0], features, NB_FEATURES);
+ st->fec_fill_pos++;
+}
+
+void lpcnet_plc_fec_clear(LPCNetPLCState *st) {
+ st->fec_read_pos = st->fec_fill_pos = st->fec_skip = 0;
+}
+
+
+static void compute_plc_pred(LPCNetPLCState *st, float *out, const float *in) {
+ float tmp[PLC_DENSE_IN_OUT_SIZE];
+ PLCModel *model = &st->model;
+ PLCNetState *net = &st->plc_net;
+ celt_assert(st->loaded);
+ compute_generic_dense(&model->plc_dense_in, tmp, in, ACTIVATION_TANH, st->arch);
+ compute_generic_gru(&model->plc_gru1_input, &model->plc_gru1_recurrent, net->gru1_state, tmp, st->arch);
+ compute_generic_gru(&model->plc_gru2_input, &model->plc_gru2_recurrent, net->gru2_state, net->gru1_state, st->arch);
+ compute_generic_dense(&model->plc_dense_out, out, net->gru2_state, ACTIVATION_LINEAR, st->arch);
+}
+
+static int get_fec_or_pred(LPCNetPLCState *st, float *out) {
+ if (st->fec_read_pos != st->fec_fill_pos && st->fec_skip==0) {
+ float plc_features[2*NB_BANDS+NB_FEATURES+1] = {0};
+ float discard[NB_FEATURES];
+ OPUS_COPY(out, &st->fec[st->fec_read_pos][0], NB_FEATURES);
+ st->fec_read_pos++;
+ /* Update PLC state using FEC, so without Burg features. */
+ OPUS_COPY(&plc_features[2*NB_BANDS], out, NB_FEATURES);
+ plc_features[2*NB_BANDS+NB_FEATURES] = -1;
+ compute_plc_pred(st, discard, plc_features);
+ return 1;
+ } else {
+ float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
+ compute_plc_pred(st, out, zeros);
+ if (st->fec_skip > 0) st->fec_skip--;
+ return 0;
+ }
+}
+
+static void queue_features(LPCNetPLCState *st, const float *features) {
+ OPUS_MOVE(&st->cont_features[0], &st->cont_features[NB_FEATURES], (CONT_VECTORS-1)*NB_FEATURES);
+ OPUS_COPY(&st->cont_features[(CONT_VECTORS-1)*NB_FEATURES], features, NB_FEATURES);
+}
+
+/* In this causal version of the code, the DNN model implemented by compute_plc_pred()
+ needs to generate two feature vectors to conceal the first lost packet.*/
+
+int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) {
+ int i;
+ if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE;
+ else st->analysis_gap = 1;
+ if (st->predict_pos - FRAME_SIZE >= 0) st->predict_pos -= FRAME_SIZE;
+ OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE);
+ for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i];
+ st->loss_count = 0;
+ st->blend = 0;
+ return 0;
+}
+
+static const float att_table[10] = {0, 0, -.2, -.2, -.4, -.4, -.8, -.8, -1.6, -1.6};
+int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) {
+ int i;
+ celt_assert(st->loaded);
+ if (st->blend == 0) {
+ int count = 0;
+ st->plc_net = st->plc_bak[0];
+ while (st->analysis_pos + FRAME_SIZE <= PLC_BUF_SIZE) {
+ float x[FRAME_SIZE];
+ float plc_features[2*NB_BANDS+NB_FEATURES+1];
+ celt_assert(st->analysis_pos >= 0);
+ for (i=0;i<FRAME_SIZE;i++) x[i] = 32768.f*st->pcm[st->analysis_pos+i];
+ burg_cepstral_analysis(plc_features, x);
+ lpcnet_compute_single_frame_features_float(&st->enc, x, st->features, st->arch);
+ if ((!st->analysis_gap || count>0) && st->analysis_pos >= st->predict_pos) {
+ queue_features(st, st->features);
+ OPUS_COPY(&plc_features[2*NB_BANDS], st->features, NB_FEATURES);
+ plc_features[2*NB_BANDS+NB_FEATURES] = 1;
+ st->plc_bak[0] = st->plc_bak[1];
+ st->plc_bak[1] = st->plc_net;
+ compute_plc_pred(st, st->features, plc_features);
+ }
+ st->analysis_pos += FRAME_SIZE;
+ count++;
+ }
+ st->plc_bak[0] = st->plc_bak[1];
+ st->plc_bak[1] = st->plc_net;
+ get_fec_or_pred(st, st->features);
+ queue_features(st, st->features);
+ st->plc_bak[0] = st->plc_bak[1];
+ st->plc_bak[1] = st->plc_net;
+ get_fec_or_pred(st, st->features);
+ queue_features(st, st->features);
+ fargan_cont(&st->fargan, &st->pcm[PLC_BUF_SIZE-FARGAN_CONT_SAMPLES], st->cont_features);
+ st->analysis_gap = 0;
+ }
+ st->plc_bak[0] = st->plc_bak[1];
+ st->plc_bak[1] = st->plc_net;
+ if (get_fec_or_pred(st, st->features)) st->loss_count = 0;
+ else st->loss_count++;
+ if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
+ else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
+ fargan_synthesize_int(&st->fargan, pcm, &st->features[0]);
+ queue_features(st, st->features);
+ if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE;
+ else st->analysis_gap = 1;
+ st->predict_pos = PLC_BUF_SIZE;
+ OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE);
+ for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i];
+ st->blend = 1;
+ return 0;
+}
diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h
new file mode 100644
index 00000000..4aa376b6
--- /dev/null
+++ b/dnn/lpcnet_private.h
@@ -0,0 +1,90 @@
+#ifndef LPCNET_PRIVATE_H
+#define LPCNET_PRIVATE_H
+
+#include <stdio.h>
+#include "freq.h"
+#include "lpcnet.h"
+#include "plc_data.h"
+#include "pitchdnn.h"
+#include "fargan.h"
+
+
+#define PITCH_FRAME_SIZE 320
+#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
+
+#define PLC_MAX_FEC 100
+#define MAX_FEATURE_BUFFER_SIZE 4
+
+#define PITCH_IF_MAX_FREQ 30
+#define PITCH_IF_FEATURES (3*PITCH_IF_MAX_FREQ - 2)
+
+#define CONT_VECTORS 5
+
+#define FEATURES_DELAY 1
+
+struct LPCNetEncState{
+ PitchDNNState pitchdnn;
+ float analysis_mem[OVERLAP_SIZE];
+ float mem_preemph;
+ kiss_fft_cpx prev_if[PITCH_IF_MAX_FREQ];
+ float if_features[PITCH_IF_FEATURES];
+ float xcorr_features[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
+ float dnn_pitch;
+ float pitch_mem[LPC_ORDER];
+ float pitch_filt;
+ float exc_buf[PITCH_BUF_SIZE];
+ float lp_buf[PITCH_BUF_SIZE];
+ float lp_mem[4];
+ float lpc[LPC_ORDER];
+ float features[NB_TOTAL_FEATURES];
+ float sig_mem[LPC_ORDER];
+ float burg_cepstrum[2*NB_BANDS];
+};
+
+typedef struct {
+ float gru1_state[PLC_GRU1_STATE_SIZE];
+ float gru2_state[PLC_GRU2_STATE_SIZE];
+} PLCNetState;
+
+#define PLC_BUF_SIZE ((CONT_VECTORS+5)*FRAME_SIZE)
+struct LPCNetPLCState {
+ PLCModel model;
+ FARGANState fargan;
+ LPCNetEncState enc;
+ int loaded;
+ int arch;
+
+#define LPCNET_PLC_RESET_START fec
+ float fec[PLC_MAX_FEC][NB_FEATURES];
+ int analysis_gap;
+ int fec_read_pos;
+ int fec_fill_pos;
+ int fec_skip;
+ int analysis_pos;
+ int predict_pos;
+ float pcm[PLC_BUF_SIZE];
+ int blend;
+ float features[NB_TOTAL_FEATURES];
+ float cont_features[CONT_VECTORS*NB_FEATURES];
+ int loss_count;
+ PLCNetState plc_net;
+ PLCNetState plc_bak[2];
+};
+
+void preemphasis(float *y, float *mem, const float *x, float coef, int N);
+
+void compute_frame_features(LPCNetEncState *st, const float *in, int arch);
+
+void lpcnet_reset_signal(LPCNetState *lpcnet);
+void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
+void run_frame_network_deferred(LPCNetState *lpcnet, const float *features);
+void run_frame_network_flush(LPCNetState *lpcnet);
+
+
+void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload);
+void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload);
+void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const opus_int16 *pcm_in, opus_int16 *output, int N);
+
+void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
+
+#endif
diff --git a/dnn/lpcnet_tables.c b/dnn/lpcnet_tables.c
new file mode 100644
index 00000000..467cb473
--- /dev/null
+++ b/dnn/lpcnet_tables.c
@@ -0,0 +1,307 @@
+/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "kiss_fft.h"
+
+static const arch_fft_state arch_fft = {0, NULL};
+
+static const opus_int16 fft_bitrev[320] = {
+0, 64, 128, 192, 256, 16, 80, 144, 208, 272, 32, 96, 160, 224, 288,
+48, 112, 176, 240, 304, 4, 68, 132, 196, 260, 20, 84, 148, 212, 276,
+36, 100, 164, 228, 292, 52, 116, 180, 244, 308, 8, 72, 136, 200, 264,
+24, 88, 152, 216, 280, 40, 104, 168, 232, 296, 56, 120, 184, 248, 312,
+12, 76, 140, 204, 268, 28, 92, 156, 220, 284, 44, 108, 172, 236, 300,
+60, 124, 188, 252, 316, 1, 65, 129, 193, 257, 17, 81, 145, 209, 273,
+33, 97, 161, 225, 289, 49, 113, 177, 241, 305, 5, 69, 133, 197, 261,
+21, 85, 149, 213, 277, 37, 101, 165, 229, 293, 53, 117, 181, 245, 309,
+9, 73, 137, 201, 265, 25, 89, 153, 217, 281, 41, 105, 169, 233, 297,
+57, 121, 185, 249, 313, 13, 77, 141, 205, 269, 29, 93, 157, 221, 285,
+45, 109, 173, 237, 301, 61, 125, 189, 253, 317, 2, 66, 130, 194, 258,
+18, 82, 146, 210, 274, 34, 98, 162, 226, 290, 50, 114, 178, 242, 306,
+6, 70, 134, 198, 262, 22, 86, 150, 214, 278, 38, 102, 166, 230, 294,
+54, 118, 182, 246, 310, 10, 74, 138, 202, 266, 26, 90, 154, 218, 282,
+42, 106, 170, 234, 298, 58, 122, 186, 250, 314, 14, 78, 142, 206, 270,
+30, 94, 158, 222, 286, 46, 110, 174, 238, 302, 62, 126, 190, 254, 318,
+3, 67, 131, 195, 259, 19, 83, 147, 211, 275, 35, 99, 163, 227, 291,
+51, 115, 179, 243, 307, 7, 71, 135, 199, 263, 23, 87, 151, 215, 279,
+39, 103, 167, 231, 295, 55, 119, 183, 247, 311, 11, 75, 139, 203, 267,
+27, 91, 155, 219, 283, 43, 107, 171, 235, 299, 59, 123, 187, 251, 315,
+15, 79, 143, 207, 271, 31, 95, 159, 223, 287, 47, 111, 175, 239, 303,
+63, 127, 191, 255, 319, };
+
+static const kiss_twiddle_cpx fft_twiddles[320] = {
+{1.00000000f, -0.00000000f}, {0.999807239f, -0.0196336918f},
+{0.999229014f, -0.0392598175f}, {0.998265624f, -0.0588708036f},
+{0.996917307f, -0.0784590989f}, {0.995184720f, -0.0980171412f},
+{0.993068457f, -0.117537394f}, {0.990569353f, -0.137012348f},
+{0.987688363f, -0.156434461f}, {0.984426558f, -0.175796285f},
+{0.980785251f, -0.195090324f}, {0.976765871f, -0.214309156f},
+{0.972369909f, -0.233445361f}, {0.967599094f, -0.252491564f},
+{0.962455213f, -0.271440446f}, {0.956940353f, -0.290284663f},
+{0.951056540f, -0.309017003f}, {0.944806039f, -0.327630192f},
+{0.938191354f, -0.346117049f}, {0.931214929f, -0.364470512f},
+{0.923879504f, -0.382683426f}, {0.916187942f, -0.400748819f},
+{0.908143163f, -0.418659747f}, {0.899748266f, -0.436409235f},
+{0.891006529f, -0.453990489f}, {0.881921291f, -0.471396744f},
+{0.872496009f, -0.488621235f}, {0.862734377f, -0.505657375f},
+{0.852640152f, -0.522498548f}, {0.842217207f, -0.539138317f},
+{0.831469595f, -0.555570245f}, {0.820401430f, -0.571787953f},
+{0.809017003f, -0.587785244f}, {0.797320664f, -0.603555918f},
+{0.785316944f, -0.619093955f}, {0.773010433f, -0.634393275f},
+{0.760405958f, -0.649448037f}, {0.747508347f, -0.664252460f},
+{0.734322488f, -0.678800762f}, {0.720853567f, -0.693087339f},
+{0.707106769f, -0.707106769f}, {0.693087339f, -0.720853567f},
+{0.678800762f, -0.734322488f}, {0.664252460f, -0.747508347f},
+{0.649448037f, -0.760405958f}, {0.634393275f, -0.773010433f},
+{0.619093955f, -0.785316944f}, {0.603555918f, -0.797320664f},
+{0.587785244f, -0.809017003f}, {0.571787953f, -0.820401430f},
+{0.555570245f, -0.831469595f}, {0.539138317f, -0.842217207f},
+{0.522498548f, -0.852640152f}, {0.505657375f, -0.862734377f},
+{0.488621235f, -0.872496009f}, {0.471396744f, -0.881921291f},
+{0.453990489f, -0.891006529f}, {0.436409235f, -0.899748266f},
+{0.418659747f, -0.908143163f}, {0.400748819f, -0.916187942f},
+{0.382683426f, -0.923879504f}, {0.364470512f, -0.931214929f},
+{0.346117049f, -0.938191354f}, {0.327630192f, -0.944806039f},
+{0.309017003f, -0.951056540f}, {0.290284663f, -0.956940353f},
+{0.271440446f, -0.962455213f}, {0.252491564f, -0.967599094f},
+{0.233445361f, -0.972369909f}, {0.214309156f, -0.976765871f},
+{0.195090324f, -0.980785251f}, {0.175796285f, -0.984426558f},
+{0.156434461f, -0.987688363f}, {0.137012348f, -0.990569353f},
+{0.117537394f, -0.993068457f}, {0.0980171412f, -0.995184720f},
+{0.0784590989f, -0.996917307f}, {0.0588708036f, -0.998265624f},
+{0.0392598175f, -0.999229014f}, {0.0196336918f, -0.999807239f},
+{6.12323426e-17f, -1.00000000f}, {-0.0196336918f, -0.999807239f},
+{-0.0392598175f, -0.999229014f}, {-0.0588708036f, -0.998265624f},
+{-0.0784590989f, -0.996917307f}, {-0.0980171412f, -0.995184720f},
+{-0.117537394f, -0.993068457f}, {-0.137012348f, -0.990569353f},
+{-0.156434461f, -0.987688363f}, {-0.175796285f, -0.984426558f},
+{-0.195090324f, -0.980785251f}, {-0.214309156f, -0.976765871f},
+{-0.233445361f, -0.972369909f}, {-0.252491564f, -0.967599094f},
+{-0.271440446f, -0.962455213f}, {-0.290284663f, -0.956940353f},
+{-0.309017003f, -0.951056540f}, {-0.327630192f, -0.944806039f},
+{-0.346117049f, -0.938191354f}, {-0.364470512f, -0.931214929f},
+{-0.382683426f, -0.923879504f}, {-0.400748819f, -0.916187942f},
+{-0.418659747f, -0.908143163f}, {-0.436409235f, -0.899748266f},
+{-0.453990489f, -0.891006529f}, {-0.471396744f, -0.881921291f},
+{-0.488621235f, -0.872496009f}, {-0.505657375f, -0.862734377f},
+{-0.522498548f, -0.852640152f}, {-0.539138317f, -0.842217207f},
+{-0.555570245f, -0.831469595f}, {-0.571787953f, -0.820401430f},
+{-0.587785244f, -0.809017003f}, {-0.603555918f, -0.797320664f},
+{-0.619093955f, -0.785316944f}, {-0.634393275f, -0.773010433f},
+{-0.649448037f, -0.760405958f}, {-0.664252460f, -0.747508347f},
+{-0.678800762f, -0.734322488f}, {-0.693087339f, -0.720853567f},
+{-0.707106769f, -0.707106769f}, {-0.720853567f, -0.693087339f},
+{-0.734322488f, -0.678800762f}, {-0.747508347f, -0.664252460f},
+{-0.760405958f, -0.649448037f}, {-0.773010433f, -0.634393275f},
+{-0.785316944f, -0.619093955f}, {-0.797320664f, -0.603555918f},
+{-0.809017003f, -0.587785244f}, {-0.820401430f, -0.571787953f},
+{-0.831469595f, -0.555570245f}, {-0.842217207f, -0.539138317f},
+{-0.852640152f, -0.522498548f}, {-0.862734377f, -0.505657375f},
+{-0.872496009f, -0.488621235f}, {-0.881921291f, -0.471396744f},
+{-0.891006529f, -0.453990489f}, {-0.899748266f, -0.436409235f},
+{-0.908143163f, -0.418659747f}, {-0.916187942f, -0.400748819f},
+{-0.923879504f, -0.382683426f}, {-0.931214929f, -0.364470512f},
+{-0.938191354f, -0.346117049f}, {-0.944806039f, -0.327630192f},
+{-0.951056540f, -0.309017003f}, {-0.956940353f, -0.290284663f},
+{-0.962455213f, -0.271440446f}, {-0.967599094f, -0.252491564f},
+{-0.972369909f, -0.233445361f}, {-0.976765871f, -0.214309156f},
+{-0.980785251f, -0.195090324f}, {-0.984426558f, -0.175796285f},
+{-0.987688363f, -0.156434461f}, {-0.990569353f, -0.137012348f},
+{-0.993068457f, -0.117537394f}, {-0.995184720f, -0.0980171412f},
+{-0.996917307f, -0.0784590989f}, {-0.998265624f, -0.0588708036f},
+{-0.999229014f, -0.0392598175f}, {-0.999807239f, -0.0196336918f},
+{-1.00000000f, -1.22464685e-16f}, {-0.999807239f, 0.0196336918f},
+{-0.999229014f, 0.0392598175f}, {-0.998265624f, 0.0588708036f},
+{-0.996917307f, 0.0784590989f}, {-0.995184720f, 0.0980171412f},
+{-0.993068457f, 0.117537394f}, {-0.990569353f, 0.137012348f},
+{-0.987688363f, 0.156434461f}, {-0.984426558f, 0.175796285f},
+{-0.980785251f, 0.195090324f}, {-0.976765871f, 0.214309156f},
+{-0.972369909f, 0.233445361f}, {-0.967599094f, 0.252491564f},
+{-0.962455213f, 0.271440446f}, {-0.956940353f, 0.290284663f},
+{-0.951056540f, 0.309017003f}, {-0.944806039f, 0.327630192f},
+{-0.938191354f, 0.346117049f}, {-0.931214929f, 0.364470512f},
+{-0.923879504f, 0.382683426f}, {-0.916187942f, 0.400748819f},
+{-0.908143163f, 0.418659747f}, {-0.899748266f, 0.436409235f},
+{-0.891006529f, 0.453990489f}, {-0.881921291f, 0.471396744f},
+{-0.872496009f, 0.488621235f}, {-0.862734377f, 0.505657375f},
+{-0.852640152f, 0.522498548f}, {-0.842217207f, 0.539138317f},
+{-0.831469595f, 0.555570245f}, {-0.820401430f, 0.571787953f},
+{-0.809017003f, 0.587785244f}, {-0.797320664f, 0.603555918f},
+{-0.785316944f, 0.619093955f}, {-0.773010433f, 0.634393275f},
+{-0.760405958f, 0.649448037f}, {-0.747508347f, 0.664252460f},
+{-0.734322488f, 0.678800762f}, {-0.720853567f, 0.693087339f},
+{-0.707106769f, 0.707106769f}, {-0.693087339f, 0.720853567f},
+{-0.678800762f, 0.734322488f}, {-0.664252460f, 0.747508347f},
+{-0.649448037f, 0.760405958f}, {-0.634393275f, 0.773010433f},
+{-0.619093955f, 0.785316944f}, {-0.603555918f, 0.797320664f},
+{-0.587785244f, 0.809017003f}, {-0.571787953f, 0.820401430f},
+{-0.555570245f, 0.831469595f}, {-0.539138317f, 0.842217207f},
+{-0.522498548f, 0.852640152f}, {-0.505657375f, 0.862734377f},
+{-0.488621235f, 0.872496009f}, {-0.471396744f, 0.881921291f},
+{-0.453990489f, 0.891006529f}, {-0.436409235f, 0.899748266f},
+{-0.418659747f, 0.908143163f}, {-0.400748819f, 0.916187942f},
+{-0.382683426f, 0.923879504f}, {-0.364470512f, 0.931214929f},
+{-0.346117049f, 0.938191354f}, {-0.327630192f, 0.944806039f},
+{-0.309017003f, 0.951056540f}, {-0.290284663f, 0.956940353f},
+{-0.271440446f, 0.962455213f}, {-0.252491564f, 0.967599094f},
+{-0.233445361f, 0.972369909f}, {-0.214309156f, 0.976765871f},
+{-0.195090324f, 0.980785251f}, {-0.175796285f, 0.984426558f},
+{-0.156434461f, 0.987688363f}, {-0.137012348f, 0.990569353f},
+{-0.117537394f, 0.993068457f}, {-0.0980171412f, 0.995184720f},
+{-0.0784590989f, 0.996917307f}, {-0.0588708036f, 0.998265624f},
+{-0.0392598175f, 0.999229014f}, {-0.0196336918f, 0.999807239f},
+{-1.83697015e-16f, 1.00000000f}, {0.0196336918f, 0.999807239f},
+{0.0392598175f, 0.999229014f}, {0.0588708036f, 0.998265624f},
+{0.0784590989f, 0.996917307f}, {0.0980171412f, 0.995184720f},
+{0.117537394f, 0.993068457f}, {0.137012348f, 0.990569353f},
+{0.156434461f, 0.987688363f}, {0.175796285f, 0.984426558f},
+{0.195090324f, 0.980785251f}, {0.214309156f, 0.976765871f},
+{0.233445361f, 0.972369909f}, {0.252491564f, 0.967599094f},
+{0.271440446f, 0.962455213f}, {0.290284663f, 0.956940353f},
+{0.309017003f, 0.951056540f}, {0.327630192f, 0.944806039f},
+{0.346117049f, 0.938191354f}, {0.364470512f, 0.931214929f},
+{0.382683426f, 0.923879504f}, {0.400748819f, 0.916187942f},
+{0.418659747f, 0.908143163f}, {0.436409235f, 0.899748266f},
+{0.453990489f, 0.891006529f}, {0.471396744f, 0.881921291f},
+{0.488621235f, 0.872496009f}, {0.505657375f, 0.862734377f},
+{0.522498548f, 0.852640152f}, {0.539138317f, 0.842217207f},
+{0.555570245f, 0.831469595f}, {0.571787953f, 0.820401430f},
+{0.587785244f, 0.809017003f}, {0.603555918f, 0.797320664f},
+{0.619093955f, 0.785316944f}, {0.634393275f, 0.773010433f},
+{0.649448037f, 0.760405958f}, {0.664252460f, 0.747508347f},
+{0.678800762f, 0.734322488f}, {0.693087339f, 0.720853567f},
+{0.707106769f, 0.707106769f}, {0.720853567f, 0.693087339f},
+{0.734322488f, 0.678800762f}, {0.747508347f, 0.664252460f},
+{0.760405958f, 0.649448037f}, {0.773010433f, 0.634393275f},
+{0.785316944f, 0.619093955f}, {0.797320664f, 0.603555918f},
+{0.809017003f, 0.587785244f}, {0.820401430f, 0.571787953f},
+{0.831469595f, 0.555570245f}, {0.842217207f, 0.539138317f},
+{0.852640152f, 0.522498548f}, {0.862734377f, 0.505657375f},
+{0.872496009f, 0.488621235f}, {0.881921291f, 0.471396744f},
+{0.891006529f, 0.453990489f}, {0.899748266f, 0.436409235f},
+{0.908143163f, 0.418659747f}, {0.916187942f, 0.400748819f},
+{0.923879504f, 0.382683426f}, {0.931214929f, 0.364470512f},
+{0.938191354f, 0.346117049f}, {0.944806039f, 0.327630192f},
+{0.951056540f, 0.309017003f}, {0.956940353f, 0.290284663f},
+{0.962455213f, 0.271440446f}, {0.967599094f, 0.252491564f},
+{0.972369909f, 0.233445361f}, {0.976765871f, 0.214309156f},
+{0.980785251f, 0.195090324f}, {0.984426558f, 0.175796285f},
+{0.987688363f, 0.156434461f}, {0.990569353f, 0.137012348f},
+{0.993068457f, 0.117537394f}, {0.995184720f, 0.0980171412f},
+{0.996917307f, 0.0784590989f}, {0.998265624f, 0.0588708036f},
+{0.999229014f, 0.0392598175f}, {0.999807239f, 0.0196336918f},
+};
+
+const kiss_fft_state kfft = {
+320, /* nfft */
+0.0031250000f, /* scale */
+-1, /* shift */
+{5, 64, 4, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev, /* bitrev*/
+fft_twiddles, /* twiddles*/
+(arch_fft_state *)&arch_fft, /* arch_fft*/
+};
+
+const float half_window[] = {
+3.78491532e-05f, 0.000340620492f, 0.000946046319f, 0.00185389258f, 0.00306380726f,
+0.00457531959f, 0.00638783723f, 0.00850064680f, 0.0109129101f, 0.0136236614f,
+0.0166318044f, 0.0199361145f, 0.0235352255f, 0.0274276342f, 0.0316116922f,
+0.0360856056f, 0.0408474281f, 0.0458950549f, 0.0512262285f, 0.0568385124f,
+0.0627293140f, 0.0688958541f, 0.0753351897f, 0.0820441842f, 0.0890194997f,
+0.0962576419f, 0.103754878f, 0.111507311f, 0.119510807f, 0.127761051f,
+0.136253506f, 0.144983411f, 0.153945804f, 0.163135484f, 0.172547072f,
+0.182174906f, 0.192013159f, 0.202055752f, 0.212296382f, 0.222728521f,
+0.233345464f, 0.244140238f, 0.255105674f, 0.266234398f, 0.277518868f,
+0.288951218f, 0.300523549f, 0.312227666f, 0.324055225f, 0.335997701f,
+0.348046392f, 0.360192508f, 0.372427016f, 0.384740859f, 0.397124738f,
+0.409569323f, 0.422065198f, 0.434602767f, 0.447172493f, 0.459764689f,
+0.472369671f, 0.484977663f, 0.497579008f, 0.510163903f, 0.522722721f,
+0.535245717f, 0.547723293f, 0.560145974f, 0.572504222f, 0.584788740f,
+0.596990347f, 0.609099925f, 0.621108532f, 0.633007407f, 0.644788086f,
+0.656442165f, 0.667961538f, 0.679338276f, 0.690564752f, 0.701633692f,
+0.712537885f, 0.723270535f, 0.733825266f, 0.744195819f, 0.754376352f,
+0.764361382f, 0.774145722f, 0.783724606f, 0.793093503f, 0.802248418f,
+0.811185598f, 0.819901764f, 0.828393936f, 0.836659551f, 0.844696403f,
+0.852502763f, 0.860077202f, 0.867418647f, 0.874526560f, 0.881400526f,
+0.888040781f, 0.894447744f, 0.900622249f, 0.906565487f, 0.912279010f,
+0.917764664f, 0.923024654f, 0.928061485f, 0.932878017f, 0.937477291f,
+0.941862822f, 0.946038187f, 0.950007319f, 0.953774393f, 0.957343817f,
+0.960720181f, 0.963908315f, 0.966913164f, 0.969739914f, 0.972393870f,
+0.974880517f, 0.977205336f, 0.979374051f, 0.981392324f, 0.983266115f,
+0.985001266f, 0.986603677f, 0.988079309f, 0.989434063f, 0.990674019f,
+0.991804957f, 0.992832899f, 0.993763626f, 0.994602919f, 0.995356441f,
+0.996029854f, 0.996628702f, 0.997158289f, 0.997623861f, 0.998030603f,
+0.998383403f, 0.998687088f, 0.998946249f, 0.999165416f, 0.999348700f,
+0.999500215f, 0.999623775f, 0.999723017f, 0.999801278f, 0.999861658f,
+0.999907196f, 0.999940455f, 0.999963880f, 0.999979615f, 0.999989510f,
+0.999995291f, 0.999998271f, 0.999999523f, 0.999999940f, 1.00000000f,
+};
+
+const float dct_table[] = {
+0.707106769f, 0.996194720f, 0.984807730f, 0.965925813f, 0.939692616f,
+0.906307817f, 0.866025388f, 0.819152057f, 0.766044438f, 0.707106769f,
+0.642787635f, 0.573576450f, 0.500000000f, 0.422618270f, 0.342020154f,
+0.258819044f, 0.173648179f, 0.0871557444f, 0.707106769f, 0.965925813f,
+0.866025388f, 0.707106769f, 0.500000000f, 0.258819044f, 6.12323426e-17f,
+-0.258819044f, -0.500000000f, -0.707106769f, -0.866025388f, -0.965925813f,
+-1.00000000f, -0.965925813f, -0.866025388f, -0.707106769f, -0.500000000f,
+-0.258819044f, 0.707106769f, 0.906307817f, 0.642787635f, 0.258819044f,
+-0.173648179f, -0.573576450f, -0.866025388f, -0.996194720f, -0.939692616f,
+-0.707106769f, -0.342020154f, 0.0871557444f, 0.500000000f, 0.819152057f,
+0.984807730f, 0.965925813f, 0.766044438f, 0.422618270f, 0.707106769f,
+0.819152057f, 0.342020154f, -0.258819044f, -0.766044438f, -0.996194720f,
+-0.866025388f, -0.422618270f, 0.173648179f, 0.707106769f, 0.984807730f,
+0.906307817f, 0.500000000f, -0.0871557444f, -0.642787635f, -0.965925813f,
+-0.939692616f, -0.573576450f, 0.707106769f, 0.707106769f, 6.12323426e-17f,
+-0.707106769f, -1.00000000f, -0.707106769f, -1.83697015e-16f, 0.707106769f,
+1.00000000f, 0.707106769f, 3.06161700e-16f, -0.707106769f, -1.00000000f,
+-0.707106769f, -4.28626385e-16f, 0.707106769f, 1.00000000f, 0.707106769f,
+0.707106769f, 0.573576450f, -0.342020154f, -0.965925813f, -0.766044438f,
+0.0871557444f, 0.866025388f, 0.906307817f, 0.173648179f, -0.707106769f,
+-0.984807730f, -0.422618270f, 0.500000000f, 0.996194720f, 0.642787635f,
+-0.258819044f, -0.939692616f, -0.819152057f, 0.707106769f, 0.422618270f,
+-0.642787635f, -0.965925813f, -0.173648179f, 0.819152057f, 0.866025388f,
+-0.0871557444f, -0.939692616f, -0.707106769f, 0.342020154f, 0.996194720f,
+0.500000000f, -0.573576450f, -0.984807730f, -0.258819044f, 0.766044438f,
+0.906307817f, 0.707106769f, 0.258819044f, -0.866025388f, -0.707106769f,
+0.500000000f, 0.965925813f, 3.06161700e-16f, -0.965925813f, -0.500000000f,
+0.707106769f, 0.866025388f, -0.258819044f, -1.00000000f, -0.258819044f,
+0.866025388f, 0.707106769f, -0.500000000f, -0.965925813f, 0.707106769f,
+0.0871557444f, -0.984807730f, -0.258819044f, 0.939692616f, 0.422618270f,
+-0.866025388f, -0.573576450f, 0.766044438f, 0.707106769f, -0.642787635f,
+-0.819152057f, 0.500000000f, 0.906307817f, -0.342020154f, -0.965925813f,
+0.173648179f, 0.996194720f, 0.707106769f, -0.0871557444f, -0.984807730f,
+0.258819044f, 0.939692616f, -0.422618270f, -0.866025388f, 0.573576450f,
+0.766044438f, -0.707106769f, -0.642787635f, 0.819152057f, 0.500000000f,
+-0.906307817f, -0.342020154f, 0.965925813f, 0.173648179f, -0.996194720f,
+0.707106769f, -0.258819044f, -0.866025388f, 0.707106769f, 0.500000000f,
+-0.965925813f, -4.28626385e-16f, 0.965925813f, -0.500000000f, -0.707106769f,
+0.866025388f, 0.258819044f, -1.00000000f, 0.258819044f, 0.866025388f,
+-0.707106769f, -0.500000000f, 0.965925813f, 0.707106769f, -0.422618270f,
+-0.642787635f, 0.965925813f, -0.173648179f, -0.819152057f, 0.866025388f,
+0.0871557444f, -0.939692616f, 0.707106769f, 0.342020154f, -0.996194720f,
+0.500000000f, 0.573576450f, -0.984807730f, 0.258819044f, 0.766044438f,
+-0.906307817f, 0.707106769f, -0.573576450f, -0.342020154f, 0.965925813f,
+-0.766044438f, -0.0871557444f, 0.866025388f, -0.906307817f, 0.173648179f,
+0.707106769f, -0.984807730f, 0.422618270f, 0.500000000f, -0.996194720f,
+0.642787635f, 0.258819044f, -0.939692616f, 0.819152057f, 0.707106769f,
+-0.707106769f, -1.83697015e-16f, 0.707106769f, -1.00000000f, 0.707106769f,
+5.51091070e-16f, -0.707106769f, 1.00000000f, -0.707106769f, -2.69484189e-15f,
+0.707106769f, -1.00000000f, 0.707106769f, -4.90477710e-16f, -0.707106769f,
+1.00000000f, -0.707106769f, 0.707106769f, -0.819152057f, 0.342020154f,
+0.258819044f, -0.766044438f, 0.996194720f, -0.866025388f, 0.422618270f,
+0.173648179f, -0.707106769f, 0.984807730f, -0.906307817f, 0.500000000f,
+0.0871557444f, -0.642787635f, 0.965925813f, -0.939692616f, 0.573576450f,
+0.707106769f, -0.906307817f, 0.642787635f, -0.258819044f, -0.173648179f,
+0.573576450f, -0.866025388f, 0.996194720f, -0.939692616f, 0.707106769f,
+-0.342020154f, -0.0871557444f, 0.500000000f, -0.819152057f, 0.984807730f,
+-0.965925813f, 0.766044438f, -0.422618270f, 0.707106769f, -0.965925813f,
+0.866025388f, -0.707106769f, 0.500000000f, -0.258819044f, 1.10280111e-15f,
+0.258819044f, -0.500000000f, 0.707106769f, -0.866025388f, 0.965925813f,
+-1.00000000f, 0.965925813f, -0.866025388f, 0.707106769f, -0.500000000f,
+0.258819044f, 0.707106769f, -0.996194720f, 0.984807730f, -0.965925813f,
+0.939692616f, -0.906307817f, 0.866025388f, -0.819152057f, 0.766044438f,
+-0.707106769f, 0.642787635f, -0.573576450f, 0.500000000f, -0.422618270f,
+0.342020154f, -0.258819044f, 0.173648179f, -0.0871557444f, };
diff --git a/dnn/meson.build b/dnn/meson.build
new file mode 100644
index 00000000..737d4a02
--- /dev/null
+++ b/dnn/meson.build
@@ -0,0 +1,64 @@
+dnn_sources = sources['DEEP_PLC_SOURCES']
+
+dred_sources = sources['DRED_SOURCES']
+if opt_enable_dred
+ dnn_sources += dred_sources
+endif
+
+osce_sources = sources['OSCE_SOURCES']
+if opt_enable_osce
+ dnn_sources += osce_sources
+endif
+
+dnn_sources_sse2 = sources['DNN_SOURCES_SSE2']
+dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1']
+dnn_sources_avx2 = sources['DNN_SOURCES_AVX2']
+
+dnn_sources_neon_intr = sources['DNN_SOURCES_NEON']
+dnn_sources_dotprod_intr = sources['DNN_SOURCES_DOTPROD']
+
+dnn_includes = [opus_includes]
+dnn_static_libs = []
+
+if host_cpu_family in ['x86', 'x86_64'] and opus_conf.has('OPUS_HAVE_RTCD')
+ dnn_sources += sources['DNN_SOURCES_X86_RTCD']
+endif
+
+if host_cpu_family in ['arm', 'aarch64'] and have_arm_intrinsics_or_asm
+ if opus_conf.has('OPUS_HAVE_RTCD')
+ dnn_sources += sources['DNN_SOURCES_ARM_RTCD']
+ endif
+endif
+
+foreach intr_name : ['sse2', 'sse4_1', 'avx2', 'neon_intr', 'dotprod_intr']
+ have_intr = get_variable('have_' + intr_name)
+ if not have_intr
+ continue
+ endif
+
+ intr_sources = get_variable('dnn_sources_' + intr_name)
+
+ intr_args = get_variable('opus_@0@_args'.format(intr_name), [])
+ dnn_static_libs += static_library('dnn_' + intr_name, intr_sources,
+ c_args: intr_args,
+ include_directories: dnn_includes,
+ install: false)
+endforeach
+
+dnn_c_args = []
+if host_machine.system() == 'windows'
+ dnn_c_args += ['-DDLL_EXPORT']
+endif
+
+
+if opt_enable_deep_plc
+ dnn_lib = static_library('opus-dnn',
+ dnn_sources,
+ c_args: dnn_c_args,
+ include_directories: dnn_includes,
+ link_whole: [dnn_static_libs],
+ dependencies: libm,
+ install: false)
+else
+ dnn_lib = []
+endif
diff --git a/dnn/nndsp.c b/dnn/nndsp.c
new file mode 100644
index 00000000..caa77038
--- /dev/null
+++ b/dnn/nndsp.c
@@ -0,0 +1,416 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "nndsp.h"
+#include "arch.h"
+#include "nnet.h"
+#include "os_support.h"
+#include "pitch.h"
+
+#include <math.h>
+
+#ifndef M_PI
+#define M_PI 3.141592653589793f
+#endif
+
+#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
+
+void init_adaconv_state(AdaConvState *hAdaConv)
+{
+ OPUS_CLEAR(hAdaConv, 1);
+}
+
+void init_adacomb_state(AdaCombState *hAdaComb)
+{
+ OPUS_CLEAR(hAdaComb, 1);
+}
+
+void init_adashape_state(AdaShapeState *hAdaShape)
+{
+ OPUS_CLEAR(hAdaShape, 1);
+}
+
+void compute_overlap_window(float *window, int overlap_size)
+{
+ int i_sample;
+ for (i_sample=0; i_sample < overlap_size; i_sample++)
+ {
+ window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
+ }
+}
+
+#ifdef DEBUG_NNDSP
+void print_float_vector(const char* name, const float *vec, int length)
+{
+ for (int i = 0; i < length; i ++)
+ {
+ printf("%s[%d]: %f\n", name, i, vec[i]);
+ }
+}
+#endif
+
+static void scale_kernel(
+ float *kernel,
+ int in_channels,
+ int out_channels,
+ int kernel_size,
+ float *gain
+)
+/* normalizes (p-norm) kernel over input channel and kernel dimension */
+{
+ float norm;
+ int i_in_channels, i_out_channels, i_kernel;
+
+ for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
+ {
+ norm = 0;
+ for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
+ {
+ for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
+ {
+ norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
+ }
+ }
+#ifdef DEBUG_NNDSP
+ printf("kernel norm: %f, %f\n", norm, sqrt(norm));
+#endif
+ norm = 1.f / (1e-6f + sqrt(norm));
+ for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
+ {
+ for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
+ {
+
+ kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
+ }
+ }
+ }
+}
+
+static void transform_gains(
+ float *gains,
+ int num_gains,
+ float filter_gain_a,
+ float filter_gain_b
+)
+{
+ int i;
+ for (i = 0; i < num_gains; i++)
+ {
+ gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
+ }
+}
+
+void adaconv_process_frame(
+ AdaConvState* hAdaConv,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *kernel_layer,
+ const LinearLayer *gain_layer,
+ int feature_dim,
+ int frame_size,
+ int overlap_size,
+ int in_channels,
+ int out_channels,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float shape_gain,
+ float *window,
+ int arch
+)
+{
+ float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
+ float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
+ float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
+ float kernel0[ADACONV_MAX_KERNEL_SIZE];
+ float kernel1[ADACONV_MAX_KERNEL_SIZE];
+ float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
+ float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
+ float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
+ float *p_input;
+ int i_in_channels, i_out_channels, i_sample;
+
+ (void) feature_dim; /* ToDo: figure out whether we might need this information */
+
+ celt_assert(shape_gain == 1);
+ celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
+ celt_assert(kernel_size < frame_size);
+
+ OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
+ OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
+ OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
+
+#ifdef DEBUG_NNDSP
+ print_float_vector("x_in", x_in, in_channels * frame_size);
+#endif
+
+ /* prepare input */
+ for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
+ {
+ OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
+ OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
+ }
+ p_input = input_buffer + kernel_size;
+
+
+ /* calculate new kernel and new gain */
+ compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
+ compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
+#ifdef DEBUG_NNDSP
+ print_float_vector("features", features, feature_dim);
+ print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
+ print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
+#endif
+ transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
+ scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
+
+#ifdef DEBUG_NNDSP
+ print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
+ print_float_vector("adaconv_gain", gain_buffer, out_channels);
+#endif
+
+ /* calculate overlapping part using kernel from last frame */
+
+ for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
+ {
+ for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
+ {
+ OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
+ OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
+
+ OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
+ OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
+ celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
+ celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
+ for (i_sample = 0; i_sample < overlap_size; i_sample++)
+ {
+ output_buffer[i_sample + i_out_channels * frame_size] += window[i_sample] * channel_buffer0[i_sample];
+ output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
+ }
+ for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
+ {
+ output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
+ }
+ }
+ }
+
+ OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
+
+#ifdef DEBUG_NNDSP
+ print_float_vector("x_out", x_out, out_channels * frame_size);
+#endif
+
+ /* buffer update */
+ for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
+ {
+ OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
+ }
+ OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
+}
+
+void adacomb_process_frame(
+ AdaCombState* hAdaComb,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *kernel_layer,
+ const LinearLayer *gain_layer,
+ const LinearLayer *global_gain_layer,
+ int pitch_lag,
+ int feature_dim,
+ int frame_size,
+ int overlap_size,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float log_gain_limit,
+ float *window,
+ int arch
+)
+{
+ float output_buffer[ADACOMB_MAX_FRAME_SIZE];
+ float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
+ float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
+ float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
+ float gain, global_gain;
+ float *p_input;
+ int i_sample;
+ float kernel[16];
+ float last_kernel[16];
+
+ (void) feature_dim; /* ToDo: figure out whether we might need this information */
+
+ OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
+ OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
+ OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
+
+ OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
+ OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
+ p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
+
+ /* calculate new kernel and new gain */
+ compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
+ compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
+ compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
+#ifdef DEBUG_NNDSP
+ print_float_vector("features", features, feature_dim);
+ print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
+ print_float_vector("adacomb_gain_raw", &gain, 1);
+ print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
+#endif
+ gain = exp(log_gain_limit - gain);
+ global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
+ scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
+
+#ifdef DEBUG_NNDSP
+ print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
+ print_float_vector("adacomb_gain", &gain, 1);
+#endif
+
+ OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
+ OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
+ OPUS_COPY(kernel, kernel_buffer, kernel_size);
+ OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
+
+ celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
+
+ celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
+ for (i_sample = 0; i_sample < overlap_size; i_sample++)
+ {
+ output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
+ }
+
+ for (i_sample = 0; i_sample < overlap_size; i_sample++)
+ {
+ output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
+ }
+
+ for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
+ {
+ output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
+ }
+ OPUS_COPY(x_out, output_buffer, frame_size);
+
+#ifdef DEBUG_NNDSP
+ print_float_vector("x_out", x_out, frame_size);
+#endif
+
+ /* buffer update */
+ OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
+ OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
+ hAdaComb->last_pitch_lag = pitch_lag;
+ hAdaComb->last_global_gain = global_gain;
+}
+
+
+void adashape_process_frame(
+ AdaShapeState *hAdaShape,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *alpha1f,
+ const LinearLayer *alpha1t,
+ const LinearLayer *alpha2,
+ int feature_dim,
+ int frame_size,
+ int avg_pool_k,
+ int arch
+)
+{
+ float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
+ float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
+ float tmp_buffer[ADASHAPE_MAX_FRAME_SIZE];
+ int i, k;
+ int tenv_size;
+ float mean;
+ float *tenv;
+
+ celt_assert(frame_size % avg_pool_k == 0);
+ celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
+
+ tenv_size = frame_size / avg_pool_k;
+ tenv = in_buffer + feature_dim;
+ OPUS_CLEAR(tenv, tenv_size + 1);
+
+ OPUS_COPY(in_buffer, features, feature_dim);
+
+ /* calculate temporal envelope */
+ mean = 0;
+ for (i = 0; i < tenv_size; i++)
+ {
+ for (k = 0; k < avg_pool_k; k++)
+ {
+ tenv[i] += fabs(x_in[i * avg_pool_k + k]);
+ }
+ tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
+ mean += tenv[i];
+ }
+ mean /= tenv_size;
+ for (i = 0; i < tenv_size; i++)
+ {
+ tenv[i] -= mean;
+ }
+ tenv[tenv_size] = mean;
+#ifdef DEBUG_NNDSP
+ print_float_vector("tenv", tenv, tenv_size + 1);
+#endif
+
+ /* calculate temporal weights */
+#ifdef DEBUG_NNDSP
+ print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
+#endif
+ compute_generic_conv1d(alpha1f, out_buffer, hAdaShape->conv_alpha1f_state, in_buffer, feature_dim, ACTIVATION_LINEAR, arch);
+ compute_generic_conv1d(alpha1t, tmp_buffer, hAdaShape->conv_alpha1t_state, tenv, tenv_size + 1, ACTIVATION_LINEAR, arch);
+#ifdef DEBUG_NNDSP
+ print_float_vector("alpha1_out", out_buffer, frame_size);
+#endif
+ /* compute leaky ReLU by hand. ToDo: try tanh activation */
+ for (i = 0; i < frame_size; i ++)
+ {
+ float tmp = out_buffer[i] + tmp_buffer[i];
+ in_buffer[i] = tmp >= 0 ? tmp : 0.2 * tmp;
+ }
+#ifdef DEBUG_NNDSP
+ print_float_vector("post_alpha1", in_buffer, frame_size);
+#endif
+ compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
+
+ /* shape signal */
+ for (i = 0; i < frame_size; i ++)
+ {
+ x_out[i] = exp(out_buffer[i]) * x_in[i];
+ }
+
+}
diff --git a/dnn/nndsp.h b/dnn/nndsp.h
new file mode 100644
index 00000000..6021250f
--- /dev/null
+++ b/dnn/nndsp.h
@@ -0,0 +1,143 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef NNDSP_H
+#define NNDSP_H
+
+#include "opus_types.h"
+#include "nnet.h"
+#include <string.h>
+
+
+#define ADACONV_MAX_KERNEL_SIZE 16
+#define ADACONV_MAX_INPUT_CHANNELS 2
+#define ADACONV_MAX_OUTPUT_CHANNELS 2
+#define ADACONV_MAX_FRAME_SIZE 80
+#define ADACONV_MAX_OVERLAP_SIZE 40
+
+#define ADACOMB_MAX_LAG 300
+#define ADACOMB_MAX_KERNEL_SIZE 16
+#define ADACOMB_MAX_FRAME_SIZE 80
+#define ADACOMB_MAX_OVERLAP_SIZE 40
+
+#define ADASHAPE_MAX_INPUT_DIM 512
+#define ADASHAPE_MAX_FRAME_SIZE 160
+
+/*#define DEBUG_NNDSP*/
+#ifdef DEBUG_NNDSP
+#include <stdio.h>
+#endif
+
+
+void print_float_vector(const char* name, const float *vec, int length);
+
+typedef struct {
+ float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS];
+ float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
+ float last_gain;
+} AdaConvState;
+
+
+typedef struct {
+ float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG];
+ float last_kernel[ADACOMB_MAX_KERNEL_SIZE];
+ float last_global_gain;
+ int last_pitch_lag;
+} AdaCombState;
+
+
+typedef struct {
+ float conv_alpha1f_state[ADASHAPE_MAX_INPUT_DIM];
+ float conv_alpha1t_state[ADASHAPE_MAX_INPUT_DIM];
+ float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE];
+} AdaShapeState;
+
+void init_adaconv_state(AdaConvState *hAdaConv);
+
+void init_adacomb_state(AdaCombState *hAdaComb);
+
+void init_adashape_state(AdaShapeState *hAdaShape);
+
+void compute_overlap_window(float *window, int overlap_size);
+
+void adaconv_process_frame(
+ AdaConvState* hAdaConv,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *kernel_layer,
+ const LinearLayer *gain_layer,
+ int feature_dim, /* not strictly necessary */
+ int frame_size,
+ int overlap_size,
+ int in_channels,
+ int out_channels,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float shape_gain,
+ float *window,
+ int arch
+);
+
+void adacomb_process_frame(
+ AdaCombState* hAdaComb,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *kernel_layer,
+ const LinearLayer *gain_layer,
+ const LinearLayer *global_gain_layer,
+ int pitch_lag,
+ int feature_dim,
+ int frame_size,
+ int overlap_size,
+ int kernel_size,
+ int left_padding,
+ float filter_gain_a,
+ float filter_gain_b,
+ float log_gain_limit,
+ float *window,
+ int arch
+);
+
+void adashape_process_frame(
+ AdaShapeState *hAdaShape,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const LinearLayer *alpha1f,
+ const LinearLayer *alpha1t,
+ const LinearLayer *alpha2,
+ int feature_dim,
+ int frame_size,
+ int avg_pool_k,
+ int arch
+);
+
+#endif
diff --git a/dnn/nnet.c b/dnn/nnet.c
new file mode 100644
index 00000000..5e87f207
--- /dev/null
+++ b/dnn/nnet.c
@@ -0,0 +1,149 @@
+/* Copyright (c) 2018 Mozilla
+ 2008-2011 Octasic Inc.
+ 2012-2017 Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <math.h>
+#include "opus_types.h"
+#include "arch.h"
+#include "nnet.h"
+#include "dred_rdovae_constants.h"
+#include "plc_data.h"
+#include "fargan.h"
+#include "os_support.h"
+#include "vec.h"
+
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
+#ifdef NO_OPTIMIZATIONS
+#if defined(_MSC_VER)
+#pragma message ("Compiling without any vectorization. This code will be very slow")
+#else
+#warning Compiling without any vectorization. This code will be very slow
+#endif
+#endif
+
+
+#define SOFTMAX_HACK
+
+
+void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
+{
+ compute_linear(layer, output, input, arch);
+ compute_activation(output, output, layer->nb_outputs, activation, arch);
+}
+
+#ifdef ENABLE_OSCE
+#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
+#else
+#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS)
+#endif
+
+void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
+{
+ int i;
+ int N;
+ float zrh[3*MAX_RNN_NEURONS_ALL];
+ float recur[3*MAX_RNN_NEURONS_ALL];
+ float *z;
+ float *r;
+ float *h;
+ celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
+ celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
+ N = recurrent_weights->nb_inputs;
+ z = zrh;
+ r = &zrh[N];
+ h = &zrh[2*N];
+ celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
+ celt_assert(in != state);
+ compute_linear(input_weights, zrh, in, arch);
+ compute_linear(recurrent_weights, recur, state, arch);
+ for (i=0;i<2*N;i++)
+ zrh[i] += recur[i];
+ compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
+ for (i=0;i<N;i++)
+ h[i] += recur[2*N+i]*r[i];
+ compute_activation(h, h, N, ACTIVATION_TANH, arch);
+ for (i=0;i<N;i++)
+ h[i] = z[i]*state[i] + (1-z[i])*h[i];
+ for (i=0;i<N;i++)
+ state[i] = h[i];
+}
+
+void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch)
+{
+ int i;
+ float act2[MAX_INPUTS];
+ celt_assert(layer->nb_inputs == layer->nb_outputs);
+ compute_linear(layer, act2, input, arch);
+ compute_activation(act2, act2, layer->nb_outputs, ACTIVATION_SIGMOID, arch);
+ if (input == output) {
+ /* Give a vectorization hint to the compiler for the in-place case. */
+ for (i=0;i<layer->nb_outputs;i++) output[i] = output[i]*act2[i];
+ } else {
+ for (i=0;i<layer->nb_outputs;i++) output[i] = input[i]*act2[i];
+ }
+}
+
+#define MAX_CONV_INPUTS_ALL DRED_MAX_CONV_INPUTS
+
+void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch)
+{
+ float tmp[MAX_CONV_INPUTS_ALL];
+ celt_assert(input != output);
+ celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
+ if (layer->nb_inputs!=input_size) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
+ OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
+ compute_linear(layer, output, tmp, arch);
+ compute_activation(output, output, layer->nb_outputs, activation, arch);
+ if (layer->nb_inputs!=input_size) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
+}
+
+void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch)
+{
+ float tmp[MAX_CONV_INPUTS_ALL];
+ int ksize = layer->nb_inputs/input_size;
+ int i;
+ celt_assert(input != output);
+ celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
+ if (dilation==1) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
+ else for (i=0;i<ksize-1;i++) OPUS_COPY(&tmp[i*input_size], &mem[i*input_size*dilation], input_size);
+ OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
+ compute_linear(layer, output, tmp, arch);
+ compute_activation(output, output, layer->nb_outputs, activation, arch);
+ if (dilation==1) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
+ else {
+ OPUS_COPY(mem, &mem[input_size], input_size*dilation*(ksize-1)-input_size);
+ OPUS_COPY(&mem[input_size*dilation*(ksize-1)-input_size], input, input_size);
+ }
+}
diff --git a/dnn/nnet.h b/dnn/nnet.h
new file mode 100644
index 00000000..7bc61337
--- /dev/null
+++ b/dnn/nnet.h
@@ -0,0 +1,163 @@
+/* Copyright (c) 2018 Mozilla
+ Copyright (c) 2017 Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef NNET_H_
+#define NNET_H_
+
+#include <stddef.h>
+#include "opus_types.h"
+
+#define ACTIVATION_LINEAR 0
+#define ACTIVATION_SIGMOID 1
+#define ACTIVATION_TANH 2
+#define ACTIVATION_RELU 3
+#define ACTIVATION_SOFTMAX 4
+#define ACTIVATION_SWISH 5
+
+#define WEIGHT_BLOB_VERSION 0
+#define WEIGHT_BLOCK_SIZE 64
+typedef struct {
+ const char *name;
+ int type;
+ int size;
+ const void *data;
+} WeightArray;
+
+#define WEIGHT_TYPE_float 0
+#define WEIGHT_TYPE_int 1
+#define WEIGHT_TYPE_qweight 2
+#define WEIGHT_TYPE_int8 3
+
+typedef struct {
+ char head[4];
+ int version;
+ int type;
+ int size;
+ int block_size;
+ char name[44];
+} WeightHead;
+
+/* Generic sparse affine transformation. */
+typedef struct {
+ const float *bias;
+ const float *subias;
+ const opus_int8 *weights;
+ const float *float_weights;
+ const int *weights_idx;
+ const float *diag;
+ const float *scale;
+ int nb_inputs;
+ int nb_outputs;
+} LinearLayer;
+
+/* Generic sparse affine transformation. */
+typedef struct {
+ const float *bias;
+ const float *float_weights;
+ int in_channels;
+ int out_channels;
+ int ktime;
+ int kheight;
+} Conv2dLayer;
+
+
+void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
+void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch);
+void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch);
+void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch);
+void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch);
+void compute_gated_activation(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
+
+
+int parse_weights(WeightArray **list, const void *data, int len);
+
+
+extern const WeightArray lpcnet_arrays[];
+extern const WeightArray plcmodel_arrays[];
+extern const WeightArray rdovaeenc_arrays[];
+extern const WeightArray rdovaedec_arrays[];
+extern const WeightArray fwgan_arrays[];
+extern const WeightArray fargan_arrays[];
+extern const WeightArray pitchdnn_arrays[];
+extern const WeightArray lossgen_arrays[];
+
+int linear_init(LinearLayer *layer, const WeightArray *arrays,
+ const char *bias,
+ const char *subias,
+ const char *weights,
+ const char *float_weights,
+ const char *weights_idx,
+ const char *diag,
+ const char *scale,
+ int nb_inputs,
+ int nb_outputs);
+
+int conv2d_init(Conv2dLayer *layer, const WeightArray *arrays,
+ const char *bias,
+ const char *float_weights,
+ int in_channels,
+ int out_channels,
+ int ktime,
+ int kheight);
+
+
+void compute_linear_c(const LinearLayer *linear, float *out, const float *in);
+void compute_activation_c(float *output, const float *input, int N, int activation);
+void compute_conv2d_c(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+
+
+#if defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#include "arm/dnn_arm.h"
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2)
+#include "x86/dnn_x86.h"
+#endif
+
+#ifndef OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
+#endif
+
+#ifndef OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
+#endif
+
+#ifndef OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_c(conv, out, mem, in, height, hstride, activation))
+#endif
+
+#if defined(__x86_64__) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
+#if defined(_MSC_VER)
+#pragma message ("Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 to get better performance")
+#else
+#warning "Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 using -march= to get better performance"
+#endif
+#endif
+
+
+
+#endif /* NNET_H_ */
diff --git a/dnn/nnet_arch.h b/dnn/nnet_arch.h
new file mode 100644
index 00000000..694a3608
--- /dev/null
+++ b/dnn/nnet_arch.h
@@ -0,0 +1,247 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef NNET_ARCH_H
+#define NNET_ARCH_H
+
+#include "nnet.h"
+#include "arch.h"
+#include "os_support.h"
+#include "vec.h"
+
+#define CAT_SUFFIX2(a,b) a ## b
+#define CAT_SUFFIX(a,b) CAT_SUFFIX2(a, b)
+
+#define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
+
+/* Force vectorization on for DNN code because some of the loops rely on
+ compiler vectorization rather than explicitly using intrinsics. */
+#if OPUS_GNUC_PREREQ(5,1)
+#define GCC_POP_OPTIONS
+#pragma GCC push_options
+#pragma GCC optimize("tree-vectorize")
+#endif
+
+
+#define MAX_ACTIVATIONS (4096)
+
+static OPUS_INLINE void vec_swish(float *y, const float *x, int N)
+{
+ int i;
+ float tmp[MAX_ACTIVATIONS];
+ celt_assert(N <= MAX_ACTIVATIONS);
+ vec_sigmoid(tmp, x, N);
+ for (i=0;i<N;i++)
+ y[i] = x[i]*tmp[i];
+}
+
+static OPUS_INLINE float relu(float x)
+{
+ return x < 0 ? 0 : x;
+}
+
+/*#define HIGH_ACCURACY */
+
+void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation)
+{
+ int i;
+ if (activation == ACTIVATION_SIGMOID) {
+#ifdef HIGH_ACCURACY
+ for (int n=0; n<N; n++)
+ {
+ output[n] = 1.f / (1 + exp(-input[n]));
+ }
+#else
+ vec_sigmoid(output, input, N);
+#endif
+ } else if (activation == ACTIVATION_TANH) {
+#ifdef HIGH_ACCURACY
+ for (int n=0; n<N; n++)
+ {
+ output[n] = tanh(input[n]);
+ }
+#else
+ vec_tanh(output, input, N);
+#endif
+ } else if (activation == ACTIVATION_SWISH) {
+ vec_swish(output, input, N);
+ } else if (activation == ACTIVATION_RELU) {
+ for (i=0;i<N;i++)
+ output[i] = relu(input[i]);
+ } else if (activation == ACTIVATION_SOFTMAX) {
+#ifdef SOFTMAX_HACK
+ OPUS_COPY(output, input, N);
+ /*for (i=0;i<N;i++)
+ output[i] = input[i];*/
+#else
+ float sum = 0;
+ softmax(output, input, N);
+ for (i=0;i<N;i++) {
+ sum += output[i];
+ }
+ sum = 1.f/(sum+1e-30);
+ for (i=0;i<N;i++)
+ output[i] = sum*output[i];
+#endif
+ } else {
+ celt_assert(activation == ACTIVATION_LINEAR);
+ if (input != output) {
+ for (i=0;i<N;i++)
+ output[i] = input[i];
+ }
+ }
+}
+
+
+void RTCD_SUF(compute_linear_) (const LinearLayer *linear, float *out, const float *in)
+{
+ int i, M, N;
+ const float *bias;
+ celt_assert(in != out);
+ bias = linear->bias;
+ M = linear->nb_inputs;
+ N = linear->nb_outputs;
+ if (linear->float_weights != NULL) {
+ if (linear->weights_idx != NULL) sparse_sgemv8x4(out, linear->float_weights, linear->weights_idx, N, in);
+ else sgemv(out, linear->float_weights, N, M, N, in);
+ } else if (linear->weights != NULL) {
+ if (linear->weights_idx != NULL) sparse_cgemv8x4(out, linear->weights, linear->weights_idx, linear->scale, N, M, in);
+ else cgemv8x4(out, linear->weights, linear->scale, N, M, in);
+ /* Only use SU biases on for integer matrices on SU archs. */
+#ifdef USE_SU_BIAS
+ bias = linear->subias;
+#endif
+ }
+ else OPUS_CLEAR(out, N);
+ if (bias != NULL) {
+ for (i=0;i<N;i++) out[i] += bias[i];
+ }
+ if (linear->diag) {
+ /* Diag is only used for GRU recurrent weights. */
+ celt_assert(3*M == N);
+ for (i=0;i<M;i++) {
+ out[i] += linear->diag[i]*in[i];
+ out[i+M] += linear->diag[i+M]*in[i];
+ out[i+2*M] += linear->diag[i+2*M]*in[i];
+ }
+ }
+}
+
+/* Computes non-padded convolution for input [ ksize1 x in_channels x (len2+ksize2) ],
+ kernel [ out_channels x in_channels x ksize1 x ksize2 ],
+ storing the output as [ out_channels x len2 ].
+ We assume that the output dimension along the ksize1 axis is 1,
+ i.e. processing one frame at a time. */
+static void conv2d_float(float *out, const float *weights, int in_channels, int out_channels, int ktime, int kheight, const float *in, int height, int hstride)
+{
+ int i;
+ int in_stride;
+ in_stride = height+kheight-1;
+ for (i=0;i<out_channels;i++) {
+ int m;
+ OPUS_CLEAR(&out[i*hstride], height);
+ for (m=0;m<in_channels;m++) {
+ int t;
+ for (t=0;t<ktime;t++) {
+ int h;
+ for (h=0;h<kheight;h++) {
+ int j;
+ for (j=0;j<height;j++) {
+ out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + t*kheight + h] *
+ in[t*in_channels*in_stride + m*in_stride + j + h];
+ }
+ }
+ }
+ }
+ }
+}
+
+/* There's no intrinsics in this function (or the one above) because the gcc (and hopefully other compiler) auto-vectorizer is smart enough to
+ produce the right code by itself based on the compile flags. */
+static void conv2d_3x3_float(float *out, const float *weights, int in_channels, int out_channels, const float *in, int height, int hstride)
+{
+ int i;
+ int in_stride;
+ int kheight, ktime;
+ kheight = ktime = 3;
+ in_stride = height+kheight-1;
+ for (i=0;i<out_channels;i++) {
+ int m;
+ OPUS_CLEAR(&out[i*hstride], height);
+ for (m=0;m<in_channels;m++) {
+ int j;
+ for (j=0;j<height;j++) {
+ /* Unrolled version of previous function -- compiler will figure out the indexing simplifications. */
+ out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 0]*in[0*in_channels*in_stride + m*in_stride + j + 0]
+ + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 1]*in[0*in_channels*in_stride + m*in_stride + j + 1]
+ + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 2]*in[0*in_channels*in_stride + m*in_stride + j + 2]
+ + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 0]*in[1*in_channels*in_stride + m*in_stride + j + 0]
+ + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 1]*in[1*in_channels*in_stride + m*in_stride + j + 1]
+ + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 2]*in[1*in_channels*in_stride + m*in_stride + j + 2]
+ + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 0]*in[2*in_channels*in_stride + m*in_stride + j + 0]
+ + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 1]*in[2*in_channels*in_stride + m*in_stride + j + 1]
+ + weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 2]*in[2*in_channels*in_stride + m*in_stride + j + 2];
+ }
+ }
+ }
+}
+
+#define MAX_CONV2D_INPUTS 8192
+
+void RTCD_SUF(compute_conv2d_)(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation)
+{
+ int i;
+ const float *bias;
+ float in_buf[MAX_CONV2D_INPUTS];
+ int time_stride;
+ celt_assert(in != out);
+ time_stride = conv->in_channels*(height+conv->kheight-1);
+ celt_assert(conv->ktime*time_stride <= MAX_CONV2D_INPUTS);
+ OPUS_COPY(in_buf, mem, (conv->ktime-1)*time_stride);
+ OPUS_COPY(&in_buf[(conv->ktime-1)*time_stride], in, time_stride);
+ OPUS_COPY(mem, &in_buf[time_stride], (conv->ktime-1)*time_stride);
+ bias = conv->bias;
+ if (conv->kheight == 3 && conv->ktime == 3)
+ conv2d_3x3_float(out, conv->float_weights, conv->in_channels, conv->out_channels, in_buf, height, hstride);
+ else
+ conv2d_float(out, conv->float_weights, conv->in_channels, conv->out_channels, conv->ktime, conv->kheight, in_buf, height, hstride);
+ if (bias != NULL) {
+ for (i=0;i<conv->out_channels;i++) {
+ int j;
+ for (j=0;j<height;j++) out[i*hstride+j] += bias[i];
+ }
+ }
+ for (i=0;i<conv->out_channels;i++) {
+ RTCD_SUF(compute_activation_)(&out[i*hstride], &out[i*hstride], height, activation);
+ }
+}
+
+#ifdef GCC_POP_OPTIONS
+#pragma GCC pop_options
+#endif
+
+#endif
diff --git a/dnn/nnet_default.c b/dnn/nnet_default.c
new file mode 100644
index 00000000..4316f0fb
--- /dev/null
+++ b/dnn/nnet_default.c
@@ -0,0 +1,35 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#define RTCD_ARCH c
+
+#include "nnet_arch.h"
diff --git a/dnn/osce.c b/dnn/osce.c
new file mode 100644
index 00000000..c412d5a1
--- /dev/null
+++ b/dnn/osce.c
@@ -0,0 +1,1419 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include <math.h>
+#include "osce.h"
+#include "osce_features.h"
+#include "os_support.h"
+#include "nndsp.h"
+#include "float_cast.h"
+#include "arch.h"
+
+#ifdef OSCE_DEBUG
+#include <stdio.h>
+/*#define WRITE_FEATURES*/
+/*#define DEBUG_LACE*/
+/*#define DEBUG_NOLACE*/
+#define FINIT(fid, name, mode) do{if (fid == NULL) {fid = fopen(name, mode);}} while(0)
+#endif
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#include <stdio.h>
+#endif
+
+#define CLIP(a, min, max) (((a) < (min) ? (min) : (a)) > (max) ? (max) : (a))
+
+extern const WeightArray lacelayers_arrays[];
+extern const WeightArray nolacelayers_arrays[];
+
+/* LACE */
+
+#ifndef DISABLE_LACE
+
+static void compute_lace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale)
+{
+ float x;
+ (void) dim;
+
+ numbits = logscale ? log(numbits) : numbits;
+ x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2;
+
+ emb[0] = sin(x * LACE_NUMBITS_SCALE_0 - 0.5f);
+ emb[1] = sin(x * LACE_NUMBITS_SCALE_1 - 0.5f);
+ emb[2] = sin(x * LACE_NUMBITS_SCALE_2 - 0.5f);
+ emb[3] = sin(x * LACE_NUMBITS_SCALE_3 - 0.5f);
+ emb[4] = sin(x * LACE_NUMBITS_SCALE_4 - 0.5f);
+ emb[5] = sin(x * LACE_NUMBITS_SCALE_5 - 0.5f);
+ emb[6] = sin(x * LACE_NUMBITS_SCALE_6 - 0.5f);
+ emb[7] = sin(x * LACE_NUMBITS_SCALE_7 - 0.5f);
+}
+
+
+static int init_lace(LACE *hLACE, const WeightArray *weights)
+{
+ int ret = 0;
+ OPUS_CLEAR(hLACE, 1);
+ celt_assert(weights != NULL);
+
+ ret = init_lacelayers(&hLACE->layers, weights);
+
+ compute_overlap_window(hLACE->window, LACE_OVERLAP_SIZE);
+
+ return ret;
+}
+
+static void reset_lace_state(LACEState *state)
+{
+ OPUS_CLEAR(state, 1);
+
+ init_adacomb_state(&state->cf1_state);
+ init_adacomb_state(&state->cf2_state);
+ init_adaconv_state(&state->af1_state);
+}
+
+static void lace_feature_net(
+ LACE *hLACE,
+ LACEState *state,
+ float *output,
+ const float *features,
+ const float *numbits,
+ const int *periods,
+ int arch
+)
+{
+ float input_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)];
+ float output_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)];
+ float numbits_embedded[2 * LACE_NUMBITS_EMBEDDING_DIM];
+ int i_subframe;
+
+ compute_lace_numbits_embedding(numbits_embedded, numbits[0], LACE_NUMBITS_EMBEDDING_DIM,
+ log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1);
+ compute_lace_numbits_embedding(numbits_embedded + LACE_NUMBITS_EMBEDDING_DIM, numbits[1], LACE_NUMBITS_EMBEDDING_DIM,
+ log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1);
+
+ /* scaling and dimensionality reduction */
+ for (i_subframe = 0; i_subframe < 4; i_subframe ++)
+ {
+ OPUS_COPY(input_buffer, features + i_subframe * LACE_NUM_FEATURES, LACE_NUM_FEATURES);
+ OPUS_COPY(input_buffer + LACE_NUM_FEATURES, hLACE->layers.lace_pitch_embedding.float_weights + periods[i_subframe] * LACE_PITCH_EMBEDDING_DIM, LACE_PITCH_EMBEDDING_DIM);
+ OPUS_COPY(input_buffer + LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * LACE_NUMBITS_EMBEDDING_DIM);
+
+ compute_generic_conv1d(
+ &hLACE->layers.lace_fnet_conv1,
+ output_buffer + i_subframe * LACE_HIDDEN_FEATURE_DIM,
+ NULL,
+ input_buffer,
+ LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM + 2 * LACE_NUMBITS_EMBEDDING_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* subframe accumulation */
+ OPUS_COPY(input_buffer, output_buffer, 4 * LACE_HIDDEN_FEATURE_DIM);
+ compute_generic_conv1d(
+ &hLACE->layers.lace_fnet_conv2,
+ output_buffer,
+ state->feature_net_conv2_state,
+ input_buffer,
+ 4 * LACE_HIDDEN_FEATURE_DIM,
+ ACTIVATION_TANH,
+ arch
+ );
+
+ /* tconv upsampling */
+ OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM);
+ compute_generic_dense(
+ &hLACE->layers.lace_fnet_tconv,
+ output_buffer,
+ input_buffer,
+ ACTIVATION_TANH,
+ arch
+ );
+
+ /* GRU */
+ OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM);
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ compute_generic_gru(
+ &hLACE->layers.lace_fnet_gru_input,
+ &hLACE->layers.lace_fnet_gru_recurrent,
+ state->feature_net_gru_state,
+ input_buffer + i_subframe * LACE_COND_DIM,
+ arch
+ );
+ OPUS_COPY(output + i_subframe * LACE_COND_DIM, state->feature_net_gru_state, LACE_COND_DIM);
+ }
+}
+
+
+static void lace_process_20ms_frame(
+ LACE* hLACE,
+ LACEState *state,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const float *numbits,
+ const int *periods,
+ int arch
+)
+{
+ float feature_buffer[4 * LACE_COND_DIM];
+ float output_buffer[4 * LACE_FRAME_SIZE];
+ int i_subframe, i_sample;
+
+#ifdef DEBUG_LACE
+ static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL;
+ static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods;
+
+
+ FINIT(f_features, "debug/c_features.f32", "wb");
+ FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb");
+ FINIT(f_xin, "debug/c_x_in.f32", "wb");
+ FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb");
+ FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb");
+ FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb");
+ FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb");
+ FINIT(f_postaf1, "debug/c_post_af1.f32", "wb");
+ FINIT(f_numbits, "debug/c_numbits.f32", "wb");
+ FINIT(f_periods, "debug/c_periods.s32", "wb");
+
+ fwrite(x_in, sizeof(*x_in), 4 * LACE_FRAME_SIZE, f_xin);
+ fwrite(numbits, sizeof(*numbits), 2, f_numbits);
+ fwrite(periods, sizeof(*periods), 4, f_periods);
+#endif
+
+ /* pre-emphasis */
+ for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++)
+ {
+ output_buffer[i_sample] = x_in[i_sample] - LACE_PREEMPH * state->preemph_mem;
+ state->preemph_mem = x_in[i_sample];
+ }
+
+ /* run feature encoder */
+ lace_feature_net(hLACE, state, feature_buffer, features, numbits, periods, arch);
+#ifdef DEBUG_LACE
+ fwrite(features, sizeof(*features), 4 * LACE_NUM_FEATURES, f_features);
+ fwrite(feature_buffer, sizeof(*feature_buffer), 4 * LACE_COND_DIM, f_encfeatures);
+ fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_xpreemph);
+#endif
+
+ /* 1st comb filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ adacomb_process_frame(
+ &state->cf1_state,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ feature_buffer + i_subframe * LACE_COND_DIM,
+ &hLACE->layers.lace_cf1_kernel,
+ &hLACE->layers.lace_cf1_gain,
+ &hLACE->layers.lace_cf1_global_gain,
+ periods[i_subframe],
+ LACE_COND_DIM,
+ LACE_FRAME_SIZE,
+ LACE_OVERLAP_SIZE,
+ LACE_CF1_KERNEL_SIZE,
+ LACE_CF1_LEFT_PADDING,
+ LACE_CF1_FILTER_GAIN_A,
+ LACE_CF1_FILTER_GAIN_B,
+ LACE_CF1_LOG_GAIN_LIMIT,
+ hLACE->window,
+ arch);
+ }
+
+#ifdef DEBUG_LACE
+ fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf1);
+#endif
+
+ /* 2nd comb filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ adacomb_process_frame(
+ &state->cf2_state,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ feature_buffer + i_subframe * LACE_COND_DIM,
+ &hLACE->layers.lace_cf2_kernel,
+ &hLACE->layers.lace_cf2_gain,
+ &hLACE->layers.lace_cf2_global_gain,
+ periods[i_subframe],
+ LACE_COND_DIM,
+ LACE_FRAME_SIZE,
+ LACE_OVERLAP_SIZE,
+ LACE_CF2_KERNEL_SIZE,
+ LACE_CF2_LEFT_PADDING,
+ LACE_CF2_FILTER_GAIN_A,
+ LACE_CF2_FILTER_GAIN_B,
+ LACE_CF2_LOG_GAIN_LIMIT,
+ hLACE->window,
+ arch);
+ }
+#ifdef DEBUG_LACE
+ fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf2);
+#endif
+
+ /* final adaptive filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ adaconv_process_frame(
+ &state->af1_state,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ output_buffer + i_subframe * LACE_FRAME_SIZE,
+ feature_buffer + i_subframe * LACE_COND_DIM,
+ &hLACE->layers.lace_af1_kernel,
+ &hLACE->layers.lace_af1_gain,
+ LACE_COND_DIM,
+ LACE_FRAME_SIZE,
+ LACE_OVERLAP_SIZE,
+ LACE_AF1_IN_CHANNELS,
+ LACE_AF1_OUT_CHANNELS,
+ LACE_AF1_KERNEL_SIZE,
+ LACE_AF1_LEFT_PADDING,
+ LACE_AF1_FILTER_GAIN_A,
+ LACE_AF1_FILTER_GAIN_B,
+ LACE_AF1_SHAPE_GAIN,
+ hLACE->window,
+ arch);
+ }
+#ifdef DEBUG_LACE
+ fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postaf1);
+#endif
+
+ /* de-emphasis */
+ for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++)
+ {
+ x_out[i_sample] = output_buffer[i_sample] + LACE_PREEMPH * state->deemph_mem;
+ state->deemph_mem = x_out[i_sample];
+ }
+#ifdef DEBUG_LACE
+ fwrite(x_out, sizeof(float), 4 * LACE_FRAME_SIZE, f_xdeemph);
+#endif
+}
+
+#endif /* #ifndef DISABLE_LACE */
+
+
+/* NoLACE */
+#ifndef DISABLE_NOLACE
+
+static void compute_nolace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale)
+{
+ float x;
+ (void) dim;
+
+ numbits = logscale ? log(numbits) : numbits;
+ x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2;
+
+ emb[0] = sin(x * NOLACE_NUMBITS_SCALE_0 - 0.5f);
+ emb[1] = sin(x * NOLACE_NUMBITS_SCALE_1 - 0.5f);
+ emb[2] = sin(x * NOLACE_NUMBITS_SCALE_2 - 0.5f);
+ emb[3] = sin(x * NOLACE_NUMBITS_SCALE_3 - 0.5f);
+ emb[4] = sin(x * NOLACE_NUMBITS_SCALE_4 - 0.5f);
+ emb[5] = sin(x * NOLACE_NUMBITS_SCALE_5 - 0.5f);
+ emb[6] = sin(x * NOLACE_NUMBITS_SCALE_6 - 0.5f);
+ emb[7] = sin(x * NOLACE_NUMBITS_SCALE_7 - 0.5f);
+}
+
+static int init_nolace(NoLACE *hNoLACE, const WeightArray *weights)
+{
+ int ret = 0;
+ OPUS_CLEAR(hNoLACE, 1);
+ celt_assert(weights != NULL);
+
+ ret = init_nolacelayers(&hNoLACE->layers, weights);
+
+ compute_overlap_window(hNoLACE->window, NOLACE_OVERLAP_SIZE);
+
+ return ret;
+}
+
+static void reset_nolace_state(NoLACEState *state)
+{
+ OPUS_CLEAR(state, 1);
+
+ init_adacomb_state(&state->cf1_state);
+ init_adacomb_state(&state->cf2_state);
+ init_adaconv_state(&state->af1_state);
+ init_adaconv_state(&state->af2_state);
+ init_adaconv_state(&state->af3_state);
+ init_adaconv_state(&state->af4_state);
+ init_adashape_state(&state->tdshape1_state);
+ init_adashape_state(&state->tdshape2_state);
+ init_adashape_state(&state->tdshape3_state);
+}
+
+static void nolace_feature_net(
+ NoLACE *hNoLACE,
+ NoLACEState *state,
+ float *output,
+ const float *features,
+ const float *numbits,
+ const int *periods,
+ int arch
+)
+{
+ float input_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)];
+ float output_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)];
+ float numbits_embedded[2 * NOLACE_NUMBITS_EMBEDDING_DIM];
+ int i_subframe;
+
+ compute_nolace_numbits_embedding(numbits_embedded, numbits[0], NOLACE_NUMBITS_EMBEDDING_DIM,
+ log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1);
+ compute_nolace_numbits_embedding(numbits_embedded + NOLACE_NUMBITS_EMBEDDING_DIM, numbits[1], NOLACE_NUMBITS_EMBEDDING_DIM,
+ log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1);
+
+ /* scaling and dimensionality reduction */
+ for (i_subframe = 0; i_subframe < 4; i_subframe ++)
+ {
+ OPUS_COPY(input_buffer, features + i_subframe * NOLACE_NUM_FEATURES, NOLACE_NUM_FEATURES);
+ OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES, hNoLACE->layers.nolace_pitch_embedding.float_weights + periods[i_subframe] * NOLACE_PITCH_EMBEDDING_DIM, NOLACE_PITCH_EMBEDDING_DIM);
+ OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * NOLACE_NUMBITS_EMBEDDING_DIM);
+
+ compute_generic_conv1d(
+ &hNoLACE->layers.nolace_fnet_conv1,
+ output_buffer + i_subframe * NOLACE_HIDDEN_FEATURE_DIM,
+ NULL,
+ input_buffer,
+ NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM + 2 * NOLACE_NUMBITS_EMBEDDING_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* subframe accumulation */
+ OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_HIDDEN_FEATURE_DIM);
+ compute_generic_conv1d(
+ &hNoLACE->layers.nolace_fnet_conv2,
+ output_buffer,
+ state->feature_net_conv2_state,
+ input_buffer,
+ 4 * NOLACE_HIDDEN_FEATURE_DIM,
+ ACTIVATION_TANH,
+ arch
+ );
+
+ /* tconv upsampling */
+ OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM);
+ compute_generic_dense(
+ &hNoLACE->layers.nolace_fnet_tconv,
+ output_buffer,
+ input_buffer,
+ ACTIVATION_TANH,
+ arch
+ );
+
+ /* GRU */
+ OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM);
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ compute_generic_gru(
+ &hNoLACE->layers.nolace_fnet_gru_input,
+ &hNoLACE->layers.nolace_fnet_gru_recurrent,
+ state->feature_net_gru_state,
+ input_buffer + i_subframe * NOLACE_COND_DIM,
+ arch
+ );
+ OPUS_COPY(output + i_subframe * NOLACE_COND_DIM, state->feature_net_gru_state, NOLACE_COND_DIM);
+ }
+}
+
+
+static void nolace_process_20ms_frame(
+ NoLACE* hNoLACE,
+ NoLACEState *state,
+ float *x_out,
+ const float *x_in,
+ const float *features,
+ const float *numbits,
+ const int *periods,
+ int arch
+)
+{
+ float feature_buffer[4 * NOLACE_COND_DIM];
+ float feature_transform_buffer[4 * NOLACE_COND_DIM];
+ float x_buffer1[8 * NOLACE_FRAME_SIZE];
+ float x_buffer2[8 * NOLACE_FRAME_SIZE];
+ int i_subframe, i_sample;
+ NOLACELayers *layers = &hNoLACE->layers;
+
+#ifdef DEBUG_NOLACE
+ static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL;
+ static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods;
+ static FILE *f_ffpostcf1, *f_fpostcf2, *f_fpostaf1;
+
+
+ FINIT(f_features, "debug/c_features.f32", "wb");
+ FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb");
+ FINIT(f_xin, "debug/c_x_in.f32", "wb");
+ FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb");
+ FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb");
+ FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb");
+ FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb");
+ FINIT(f_postaf1, "debug/c_post_af1.f32", "wb");
+ FINIT(f_numbits, "debug/c_numbits.f32", "wb");
+ FINIT(f_periods, "debug/c_periods.s32", "wb");
+
+ fwrite(x_in, sizeof(*x_in), 4 * NOLACE_FRAME_SIZE, f_xin);
+ fwrite(numbits, sizeof(*numbits), 2, f_numbits);
+ fwrite(periods, sizeof(*periods), 4, f_periods);
+#endif
+
+ /* pre-emphasis */
+ for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++)
+ {
+ x_buffer1[i_sample] = x_in[i_sample] - NOLACE_PREEMPH * state->preemph_mem;
+ state->preemph_mem = x_in[i_sample];
+ }
+
+ /* run feature encoder */
+ nolace_feature_net(hNoLACE, state, feature_buffer, features, numbits, periods, arch);
+#ifdef DEBUG_NOLACE
+ fwrite(features, sizeof(*features), 4 * NOLACE_NUM_FEATURES, f_features);
+ fwrite(feature_buffer, sizeof(*feature_buffer), 4 * NOLACE_COND_DIM, f_encfeatures);
+ fwrite(output_buffer, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xpreemph);
+#endif
+
+ /* 1st comb filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ /* modifies signal in place */
+ adacomb_process_frame(
+ &state->cf1_state,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_cf1_kernel,
+ &hNoLACE->layers.nolace_cf1_gain,
+ &hNoLACE->layers.nolace_cf1_global_gain,
+ periods[i_subframe],
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_CF1_KERNEL_SIZE,
+ NOLACE_CF1_LEFT_PADDING,
+ NOLACE_CF1_FILTER_GAIN_A,
+ NOLACE_CF1_FILTER_GAIN_B,
+ NOLACE_CF1_LOG_GAIN_LIMIT,
+ hNoLACE->window,
+ arch);
+
+ compute_generic_conv1d(
+ &layers->nolace_post_cf1,
+ feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+ state->post_cf1_state,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ NOLACE_COND_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* update feature buffer */
+ OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+ fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf1);
+#endif
+
+ /* 2nd comb filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ /* modifies signal in place */
+ adacomb_process_frame(
+ &state->cf2_state,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_cf2_kernel,
+ &hNoLACE->layers.nolace_cf2_gain,
+ &hNoLACE->layers.nolace_cf2_global_gain,
+ periods[i_subframe],
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_CF2_KERNEL_SIZE,
+ NOLACE_CF2_LEFT_PADDING,
+ NOLACE_CF2_FILTER_GAIN_A,
+ NOLACE_CF2_FILTER_GAIN_B,
+ NOLACE_CF2_LOG_GAIN_LIMIT,
+ hNoLACE->window,
+ arch);
+
+ compute_generic_conv1d(
+ &layers->nolace_post_cf2,
+ feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+ state->post_cf2_state,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ NOLACE_COND_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* update feature buffer */
+ OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+ fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf2);
+#endif
+
+ /* final adaptive filtering stage */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ adaconv_process_frame(
+ &state->af1_state,
+ x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_af1_kernel,
+ &hNoLACE->layers.nolace_af1_gain,
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_AF1_IN_CHANNELS,
+ NOLACE_AF1_OUT_CHANNELS,
+ NOLACE_AF1_KERNEL_SIZE,
+ NOLACE_AF1_LEFT_PADDING,
+ NOLACE_AF1_FILTER_GAIN_A,
+ NOLACE_AF1_FILTER_GAIN_B,
+ NOLACE_AF1_SHAPE_GAIN,
+ hNoLACE->window,
+ arch);
+
+ compute_generic_conv1d(
+ &layers->nolace_post_af1,
+ feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+ state->post_af1_state,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ NOLACE_COND_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* update feature buffer */
+ OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+ fwrite(x_buffer2, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS, f_postaf1);
+#endif
+
+ /* first shape-mix round */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ celt_assert(NOLACE_AF1_OUT_CHANNELS == 2);
+ /* modifies second channel in place */
+ adashape_process_frame(
+ &state->tdshape1_state,
+ x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &layers->nolace_tdshape1_alpha1_f,
+ &layers->nolace_tdshape1_alpha1_t,
+ &layers->nolace_tdshape1_alpha2,
+ NOLACE_TDSHAPE1_FEATURE_DIM,
+ NOLACE_TDSHAPE1_FRAME_SIZE,
+ NOLACE_TDSHAPE1_AVG_POOL_K,
+ arch
+ );
+
+ adaconv_process_frame(
+ &state->af2_state,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS,
+ x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_IN_CHANNELS,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_af2_kernel,
+ &hNoLACE->layers.nolace_af2_gain,
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_AF2_IN_CHANNELS,
+ NOLACE_AF2_OUT_CHANNELS,
+ NOLACE_AF2_KERNEL_SIZE,
+ NOLACE_AF2_LEFT_PADDING,
+ NOLACE_AF2_FILTER_GAIN_A,
+ NOLACE_AF2_FILTER_GAIN_B,
+ NOLACE_AF2_SHAPE_GAIN,
+ hNoLACE->window,
+ arch);
+
+ compute_generic_conv1d(
+ &layers->nolace_post_af2,
+ feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+ state->post_af2_state,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ NOLACE_COND_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* update feature buffer */
+ OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+#ifdef DEBUG_NOLACE
+ fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS, f_postaf2);
+#endif
+
+ /* second shape-mix round */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ celt_assert(NOLACE_AF2_OUT_CHANNELS == 2);
+ /* modifies second channel in place */
+ adashape_process_frame(
+ &state->tdshape2_state,
+ x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &layers->nolace_tdshape2_alpha1_f,
+ &layers->nolace_tdshape2_alpha1_t,
+ &layers->nolace_tdshape2_alpha2,
+ NOLACE_TDSHAPE2_FEATURE_DIM,
+ NOLACE_TDSHAPE2_FRAME_SIZE,
+ NOLACE_TDSHAPE2_AVG_POOL_K,
+ arch
+ );
+
+ adaconv_process_frame(
+ &state->af3_state,
+ x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_OUT_CHANNELS,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_IN_CHANNELS,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_af3_kernel,
+ &hNoLACE->layers.nolace_af3_gain,
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_AF3_IN_CHANNELS,
+ NOLACE_AF3_OUT_CHANNELS,
+ NOLACE_AF3_KERNEL_SIZE,
+ NOLACE_AF3_LEFT_PADDING,
+ NOLACE_AF3_FILTER_GAIN_A,
+ NOLACE_AF3_FILTER_GAIN_B,
+ NOLACE_AF3_SHAPE_GAIN,
+ hNoLACE->window,
+ arch);
+
+ compute_generic_conv1d(
+ &layers->nolace_post_af3,
+ feature_transform_buffer + i_subframe * NOLACE_COND_DIM,
+ state->post_af3_state,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ NOLACE_COND_DIM,
+ ACTIVATION_TANH,
+ arch);
+ }
+
+ /* update feature buffer */
+ OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);
+
+ /* third shape-mix round */
+ for (i_subframe = 0; i_subframe < 4; i_subframe++)
+ {
+ celt_assert(NOLACE_AF3_OUT_CHANNELS == 2);
+ /* modifies second channel in place */
+ adashape_process_frame(
+ &state->tdshape3_state,
+ x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &layers->nolace_tdshape3_alpha1_f,
+ &layers->nolace_tdshape3_alpha1_t,
+ &layers->nolace_tdshape3_alpha2,
+ NOLACE_TDSHAPE3_FEATURE_DIM,
+ NOLACE_TDSHAPE3_FRAME_SIZE,
+ NOLACE_TDSHAPE3_AVG_POOL_K,
+ arch
+ );
+
+ adaconv_process_frame(
+ &state->af4_state,
+ x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_OUT_CHANNELS,
+ x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_IN_CHANNELS,
+ feature_buffer + i_subframe * NOLACE_COND_DIM,
+ &hNoLACE->layers.nolace_af4_kernel,
+ &hNoLACE->layers.nolace_af4_gain,
+ NOLACE_COND_DIM,
+ NOLACE_FRAME_SIZE,
+ NOLACE_OVERLAP_SIZE,
+ NOLACE_AF4_IN_CHANNELS,
+ NOLACE_AF4_OUT_CHANNELS,
+ NOLACE_AF4_KERNEL_SIZE,
+ NOLACE_AF4_LEFT_PADDING,
+ NOLACE_AF4_FILTER_GAIN_A,
+ NOLACE_AF4_FILTER_GAIN_B,
+ NOLACE_AF4_SHAPE_GAIN,
+ hNoLACE->window,
+ arch);
+
+ }
+
+
+ /* de-emphasis */
+ for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++)
+ {
+ x_out[i_sample] = x_buffer1[i_sample] + NOLACE_PREEMPH * state->deemph_mem;
+ state->deemph_mem = x_out[i_sample];
+ }
+#ifdef DEBUG_NOLACE
+ fwrite(x_out, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xdeemph);
+#endif
+}
+
+#endif /* #ifndef DISABLE_NOLACE */
+
+/* API */
+
+void osce_reset(silk_OSCE_struct *hOSCE, int method)
+{
+ OSCEState *state = &hOSCE->state;
+
+ OPUS_CLEAR(&hOSCE->features, 1);
+
+ switch(method)
+ {
+ case OSCE_METHOD_NONE:
+ break;
+#ifndef DISABLE_LACE
+ case OSCE_METHOD_LACE:
+ reset_lace_state(&state->lace);
+ break;
+#endif
+#ifndef DISABLE_NOLACE
+ case OSCE_METHOD_NOLACE:
+ reset_nolace_state(&state->nolace);
+ break;
+#endif
+ default:
+ celt_assert(0 && "method not defined"); /* Question: return error code? */
+ }
+ hOSCE->method = method;
+ hOSCE->features.reset = 2;
+}
+
+
+#if 0
+#include <stdio.h>
+static void print_float_array(FILE *fid, const char *name, const float *array, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ fprintf(fid, "%s[%d]: %f\n", name, i, array[i]);
+ }
+}
+
+static void print_int_array(FILE *fid, const char *name, const int *array, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ fprintf(fid, "%s[%d]: %d\n", name, i, array[i]);
+ }
+}
+
+static void print_int8_array(FILE *fid, const char *name, const opus_int8 *array, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ fprintf(fid, "%s[%d]: %d\n", name, i, array[i]);
+ }
+}
+
+static void print_linear_layer(FILE *fid, const char *name, LinearLayer *layer)
+{
+ int i, n_in, n_out, n_total;
+ char tmp[256];
+
+ n_in = layer->nb_inputs;
+ n_out = layer->nb_outputs;
+ n_total = n_in * n_out;
+
+ fprintf(fid, "\nprinting layer %s...\n", name);
+ fprintf(fid, "%s.nb_inputs: %d\n%s.nb_outputs: %d\n", name, n_in, name, n_out);
+
+ if (layer->bias !=NULL){}
+ if (layer->subias !=NULL){}
+ if (layer->weights !=NULL){}
+ if (layer->float_weights !=NULL){}
+
+ if (layer->bias != NULL) {sprintf(tmp, "%s.bias", name); print_float_array(fid, tmp, layer->bias, n_out);}
+ if (layer->subias != NULL) {sprintf(tmp, "%s.subias", name); print_float_array(fid, tmp, layer->subias, n_out);}
+ if (layer->weights != NULL) {sprintf(tmp, "%s.weights", name); print_int8_array(fid, tmp, layer->weights, n_total);}
+ if (layer->float_weights != NULL) {sprintf(tmp, "%s.float_weights", name); print_float_array(fid, tmp, layer->float_weights, n_total);}
+ //if (layer->weights_idx != NULL) {sprintf(tmp, "%s.weights_idx", name); print_float_array(fid, tmp, layer->weights_idx, n_total);}
+ if (layer->diag != NULL) {sprintf(tmp, "%s.diag", name); print_float_array(fid, tmp, layer->diag, n_in);}
+ if (layer->scale != NULL) {sprintf(tmp, "%s.scale", name); print_float_array(fid, tmp, layer->scale, n_out);}
+
+}
+#endif
+
+int osce_load_models(OSCEModel *model, const void *data, int len)
+{
+ int ret = 0;
+ WeightArray *list;
+
+ if (data != NULL && len)
+ {
+ /* init from buffer */
+ parse_weights(&list, data, len);
+
+#ifndef DISABLE_LACE
+ if (ret == 0) {ret = init_lace(&model->lace, list);}
+#endif
+
+#ifndef DISABLE_NOLACE
+ if (ret == 0) {ret = init_nolace(&model->nolace, list);}
+#endif
+
+ free(list);
+ } else
+ {
+#ifdef USE_WEIGHTS_FILE
+ return -1;
+#else
+#ifndef DISABLE_LACE
+ if (ret == 0) {ret = init_lace(&model->lace, lacelayers_arrays);}
+#endif
+
+#ifndef DISABLE_NOLACE
+ if (ret == 0) {ret = init_nolace(&model->nolace, nolacelayers_arrays);}
+#endif
+
+#endif /* USE_WEIGHTS_FILE */
+ }
+
+ ret = ret ? -1 : 0;
+ return ret;
+}
+
+void osce_enhance_frame(
+ OSCEModel *model, /* I OSCE model struct */
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I Decoder control */
+ opus_int16 xq[], /* I/O Decoded speech */
+ opus_int32 num_bits, /* I Size of SILK payload in bits */
+ int arch /* I Run-time architecture */
+)
+{
+ float in_buffer[320];
+ float out_buffer[320];
+ float features[4 * OSCE_FEATURE_DIM];
+ float numbits[2];
+ int periods[4];
+ int i;
+ int method;
+
+ /* enhancement only implemented for 20 ms frame at 16kHz */
+ if (psDec->fs_kHz != 16 || psDec->nb_subfr != 4)
+ {
+ osce_reset(&psDec->osce, psDec->osce.method);
+ return;
+ }
+
+ osce_calculate_features(psDec, psDecCtrl, features, numbits, periods, xq, num_bits);
+
+ /* scale input */
+ for (i = 0; i < 320; i++)
+ {
+ in_buffer[i] = ((float) xq[i]) * (1.f/32768.f);
+ }
+
+ if (model->loaded)
+ method = psDec->osce.method;
+ else
+ method = OSCE_METHOD_NONE;
+ switch(method)
+ {
+ case OSCE_METHOD_NONE:
+ OPUS_COPY(out_buffer, in_buffer, 320);
+ break;
+#ifndef DISABLE_LACE
+ case OSCE_METHOD_LACE:
+ lace_process_20ms_frame(&model->lace, &psDec->osce.state.lace, out_buffer, in_buffer, features, numbits, periods, arch);
+ break;
+#endif
+#ifndef DISABLE_NOLACE
+ case OSCE_METHOD_NOLACE:
+ nolace_process_20ms_frame(&model->nolace, &psDec->osce.state.nolace, out_buffer, in_buffer, features, numbits, periods, arch);
+ break;
+#endif
+ default:
+ celt_assert(0 && "method not defined");
+ }
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ int k;
+
+ static FILE *flpc = NULL;
+ static FILE *fgain = NULL;
+ static FILE *fltp = NULL;
+ static FILE *fperiod = NULL;
+ static FILE *fnoisy16k = NULL;
+ static FILE* f_numbits = NULL;
+ static FILE* f_numbits_smooth = NULL;
+
+ if (flpc == NULL) {flpc = fopen("features_lpc.f32", "wb");}
+ if (fgain == NULL) {fgain = fopen("features_gain.f32", "wb");}
+ if (fltp == NULL) {fltp = fopen("features_ltp.f32", "wb");}
+ if (fperiod == NULL) {fperiod = fopen("features_period.s16", "wb");}
+ if (fnoisy16k == NULL) {fnoisy16k = fopen("noisy_16k.s16", "wb");}
+ if(f_numbits == NULL) {f_numbits = fopen("features_num_bits.s32", "wb");}
+ if (f_numbits_smooth == NULL) {f_numbits_smooth = fopen("features_num_bits_smooth.f32", "wb");}
+
+ fwrite(&num_bits, sizeof(num_bits), 1, f_numbits);
+ fwrite(&(psDec->osce.features.numbits_smooth), sizeof(psDec->osce.features.numbits_smooth), 1, f_numbits_smooth);
+
+ for (k = 0; k < psDec->nb_subfr; k++)
+ {
+ float tmp;
+ int16_t itmp;
+ float lpc_buffer[16] = {0};
+ opus_int16 *A_Q12, *B_Q14;
+
+ (void) num_bits;
+ (void) arch;
+
+ /* gain */
+ tmp = (float) psDecCtrl->Gains_Q16[k] / (1UL << 16);
+ fwrite(&tmp, sizeof(tmp), 1, fgain);
+
+ /* LPC */
+ A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ];
+ for (i = 0; i < psDec->LPC_order; i++)
+ {
+ lpc_buffer[i] = (float) A_Q12[i] / (1U << 12);
+ }
+ fwrite(lpc_buffer, sizeof(lpc_buffer[0]), 16, flpc);
+
+ /* LTP */
+ B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];
+ for (i = 0; i < 5; i++)
+ {
+ tmp = (float) B_Q14[i] / (1U << 14);
+ fwrite(&tmp, sizeof(tmp), 1, fltp);
+ }
+
+ /* periods */
+ itmp = psDec->indices.signalType == TYPE_VOICED ? psDecCtrl->pitchL[ k ] : 0;
+ fwrite(&itmp, sizeof(itmp), 1, fperiod);
+ }
+
+ fwrite(xq, psDec->nb_subfr * psDec->subfr_length, sizeof(xq[0]), fnoisy16k);
+#endif
+
+ if (psDec->osce.features.reset > 1)
+ {
+ OPUS_COPY(out_buffer, in_buffer, 320);
+ psDec->osce.features.reset --;
+ }
+ else if (psDec->osce.features.reset)
+ {
+ osce_cross_fade_10ms(out_buffer, in_buffer, 320);
+ psDec->osce.features.reset = 0;
+ }
+
+ /* scale output */
+ for (i = 0; i < 320; i++)
+ {
+ float tmp = 32768.f * out_buffer[i];
+ if (tmp > 32767.f) tmp = 32767.f;
+ if (tmp < -32767.f) tmp = -32767.f;
+ xq[i] = float2int(tmp);
+ }
+
+}
+
+
+#if 0
+
+#include <stdio.h>
+
+void lace_feature_net_compare(
+ const char * prefix,
+ int num_frames,
+ LACE* hLACE
+)
+{
+ char in_feature_file[256];
+ char out_feature_file[256];
+ char numbits_file[256];
+ char periods_file[256];
+ char message[512];
+ int i_frame, i_feature;
+ float mse;
+ float in_features[4 * LACE_NUM_FEATURES];
+ float out_features[4 * LACE_COND_DIM];
+ float out_features2[4 * LACE_COND_DIM];
+ float numbits[2];
+ int periods[4];
+
+ init_lace(hLACE);
+
+ FILE *f_in_features, *f_out_features, *f_numbits, *f_periods;
+
+ strcpy(in_feature_file, prefix);
+ strcat(in_feature_file, "_in_features.f32");
+ f_in_features = fopen(in_feature_file, "rb");
+ if (f_in_features == NULL)
+ {
+ sprintf(message, "could not open file %s", in_feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(out_feature_file, prefix);
+ strcat(out_feature_file, "_out_features.f32");
+ f_out_features = fopen(out_feature_file, "rb");
+ if (f_out_features == NULL)
+ {
+ sprintf(message, "could not open file %s", out_feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(periods_file, prefix);
+ strcat(periods_file, "_periods.s32");
+ f_periods = fopen(periods_file, "rb");
+ if (f_periods == NULL)
+ {
+ sprintf(message, "could not open file %s", periods_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(numbits_file, prefix);
+ strcat(numbits_file, "_numbits.f32");
+ f_numbits = fopen(numbits_file, "rb");
+ if (f_numbits == NULL)
+ {
+ sprintf(message, "could not open file %s", numbits_file);
+ perror(message);
+ exit(1);
+ }
+
+ for (i_frame = 0; i_frame < num_frames; i_frame ++)
+ {
+ if(fread(in_features, sizeof(float), 4 * LACE_NUM_FEATURES, f_in_features) != 4 * LACE_NUM_FEATURES)
+ {
+ fprintf(stderr, "could not read frame %d from in_features\n", i_frame);
+ exit(1);
+ }
+ if(fread(out_features, sizeof(float), 4 * LACE_COND_DIM, f_out_features) != 4 * LACE_COND_DIM)
+ {
+ fprintf(stderr, "could not read frame %d from out_features\n", i_frame);
+ exit(1);
+ }
+ if(fread(periods, sizeof(int), 4, f_periods) != 4)
+ {
+ fprintf(stderr, "could not read frame %d from periods\n", i_frame);
+ exit(1);
+ }
+ if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
+ {
+ fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
+ exit(1);
+ }
+
+
+ lace_feature_net(hLACE, out_features2, in_features, numbits, periods);
+
+ float mse = 0;
+ for (int i = 0; i < 4 * LACE_COND_DIM; i ++)
+ {
+ mse += pow(out_features[i] - out_features2[i], 2);
+ }
+ mse /= (4 * LACE_COND_DIM);
+ printf("rmse: %f\n", sqrt(mse));
+
+ }
+
+ fclose(f_in_features);
+ fclose(f_out_features);
+ fclose(f_numbits);
+ fclose(f_periods);
+}
+
+
+void lace_demo(
+ char *prefix,
+ char *output
+)
+{
+ char feature_file[256];
+ char numbits_file[256];
+ char periods_file[256];
+ char x_in_file[256];
+ char message[512];
+ int i_frame;
+ float mse;
+ float features[4 * LACE_NUM_FEATURES];
+ float numbits[2];
+ int periods[4];
+ float x_in[4 * LACE_FRAME_SIZE];
+ int16_t x_out[4 * LACE_FRAME_SIZE];
+ float buffer[4 * LACE_FRAME_SIZE];
+ LACE hLACE;
+ int frame_counter = 0;
+ FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out;
+
+ init_lace(&hLACE);
+
+ strcpy(feature_file, prefix);
+ strcat(feature_file, "_features.f32");
+ f_features = fopen(feature_file, "rb");
+ if (f_features == NULL)
+ {
+ sprintf(message, "could not open file %s", feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_in_file, prefix);
+ strcat(x_in_file, "_x_in.f32");
+ f_x_in = fopen(x_in_file, "rb");
+ if (f_x_in == NULL)
+ {
+ sprintf(message, "could not open file %s", x_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ f_x_out = fopen(output, "wb");
+ if (f_x_out == NULL)
+ {
+ sprintf(message, "could not open file %s", output);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(periods_file, prefix);
+ strcat(periods_file, "_periods.s32");
+ f_periods = fopen(periods_file, "rb");
+ if (f_periods == NULL)
+ {
+ sprintf(message, "could not open file %s", periods_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(numbits_file, prefix);
+ strcat(numbits_file, "_numbits.f32");
+ f_numbits = fopen(numbits_file, "rb");
+ if (f_numbits == NULL)
+ {
+ sprintf(message, "could not open file %s", numbits_file);
+ perror(message);
+ exit(1);
+ }
+
+ printf("processing %s\n", prefix);
+
+ while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE)
+ {
+ printf("\rframe: %d", frame_counter++);
+ if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES)
+ {
+ fprintf(stderr, "could not read frame %d from features\n", i_frame);
+ exit(1);
+ }
+ if(fread(periods, sizeof(int), 4, f_periods) != 4)
+ {
+ fprintf(stderr, "could not read frame %d from periods\n", i_frame);
+ exit(1);
+ }
+ if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
+ {
+ fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
+ exit(1);
+ }
+
+ lace_process_20ms_frame(
+ &hLACE,
+ buffer,
+ x_in,
+ features,
+ numbits,
+ periods
+ );
+
+ for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++)
+ {
+ float tmp = (1UL<<15) * buffer[n];
+ tmp = CLIP(tmp, -32768, 32767);
+ x_out[n] = (int16_t) round(tmp);
+ }
+
+ fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out);
+ }
+ printf("\ndone!\n");
+
+ fclose(f_features);
+ fclose(f_numbits);
+ fclose(f_periods);
+ fclose(f_x_in);
+ fclose(f_x_out);
+}
+
+void nolace_demo(
+ char *prefix,
+ char *output
+)
+{
+ char feature_file[256];
+ char numbits_file[256];
+ char periods_file[256];
+ char x_in_file[256];
+ char message[512];
+ int i_frame;
+ float mse;
+ float features[4 * LACE_NUM_FEATURES];
+ float numbits[2];
+ int periods[4];
+ float x_in[4 * LACE_FRAME_SIZE];
+ int16_t x_out[4 * LACE_FRAME_SIZE];
+ float buffer[4 * LACE_FRAME_SIZE];
+ NoLACE hNoLACE;
+ int frame_counter = 0;
+ FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out;
+
+ init_nolace(&hNoLACE);
+
+ strcpy(feature_file, prefix);
+ strcat(feature_file, "_features.f32");
+ f_features = fopen(feature_file, "rb");
+ if (f_features == NULL)
+ {
+ sprintf(message, "could not open file %s", feature_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(x_in_file, prefix);
+ strcat(x_in_file, "_x_in.f32");
+ f_x_in = fopen(x_in_file, "rb");
+ if (f_x_in == NULL)
+ {
+ sprintf(message, "could not open file %s", x_in_file);
+ perror(message);
+ exit(1);
+ }
+
+ f_x_out = fopen(output, "wb");
+ if (f_x_out == NULL)
+ {
+ sprintf(message, "could not open file %s", output);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(periods_file, prefix);
+ strcat(periods_file, "_periods.s32");
+ f_periods = fopen(periods_file, "rb");
+ if (f_periods == NULL)
+ {
+ sprintf(message, "could not open file %s", periods_file);
+ perror(message);
+ exit(1);
+ }
+
+ strcpy(numbits_file, prefix);
+ strcat(numbits_file, "_numbits.f32");
+ f_numbits = fopen(numbits_file, "rb");
+ if (f_numbits == NULL)
+ {
+ sprintf(message, "could not open file %s", numbits_file);
+ perror(message);
+ exit(1);
+ }
+
+ printf("processing %s\n", prefix);
+
+ while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE)
+ {
+ printf("\rframe: %d", frame_counter++);
+ if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES)
+ {
+ fprintf(stderr, "could not read frame %d from features\n", i_frame);
+ exit(1);
+ }
+ if(fread(periods, sizeof(int), 4, f_periods) != 4)
+ {
+ fprintf(stderr, "could not read frame %d from periods\n", i_frame);
+ exit(1);
+ }
+ if(fread(numbits, sizeof(float), 2, f_numbits) != 2)
+ {
+ fprintf(stderr, "could not read frame %d from numbits\n", i_frame);
+ exit(1);
+ }
+
+ nolace_process_20ms_frame(
+ &hNoLACE,
+ buffer,
+ x_in,
+ features,
+ numbits,
+ periods
+ );
+
+ for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++)
+ {
+ float tmp = (1UL<<15) * buffer[n];
+ tmp = CLIP(tmp, -32768, 32767);
+ x_out[n] = (int16_t) round(tmp);
+ }
+
+ fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out);
+ }
+ printf("\ndone!\n");
+
+ fclose(f_features);
+ fclose(f_numbits);
+ fclose(f_periods);
+ fclose(f_x_in);
+ fclose(f_x_out);
+}
+
+
+int main()
+{
+#if 0
+ LACE hLACE;
+
+ lace_feature_net_compare("testvec2/lace", 5, &hLACE);
+
+ lace_demo("testdata/test9", "out_lace_c_9kbps.pcm");
+ lace_demo("testdata/test6", "out_lace_c_6kbps.pcm");
+#endif
+ nolace_demo("testdata/test9", "out_nolace_c_9kbps.pcm");
+
+}
+#endif
+
+/*gcc -I ../include -I . -I ../silk -I ../celt osce.c nndsp.c lace_data.c nolace_data.c nnet.c parse_lpcnet_weights.c -lm -o lacetest*/
diff --git a/dnn/osce.h b/dnn/osce.h
new file mode 100644
index 00000000..1aadfad4
--- /dev/null
+++ b/dnn/osce.h
@@ -0,0 +1,84 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_H
+#define OSCE_H
+
+
+#include "opus_types.h"
+/*#include "osce_config.h"*/
+#ifndef DISABLE_LACE
+#include "lace_data.h"
+#endif
+#ifndef DISABLE_NOLACE
+#include "nolace_data.h"
+#endif
+#include "nndsp.h"
+#include "nnet.h"
+#include "osce_structs.h"
+#include "structs.h"
+
+#define OSCE_METHOD_NONE 0
+#ifndef DISABLE_LACE
+#define OSCE_METHOD_LACE 1
+#endif
+#ifndef DISABLE_NOLACE
+#define OSCE_METHOD_NOLACE 2
+#endif
+
+#if !defined(DISABLE_NOLACE)
+#define OSCE_DEFAULT_METHOD OSCE_METHOD_NOLACE
+#define OSCE_MAX_RNN_NEURONS NOLACE_FNET_GRU_STATE_SIZE
+#elif !defined(DISABLE_LACE)
+#define OSCE_DEFAULT_METHOD OSCE_METHOD_LACE
+#define OSCE_MAX_RNN_NEURONS LACE_FNET_GRU_STATE_SIZE
+#else
+#define OSCE_DEFAULT_METHOD OSCE_METHOD_NONE
+#define OSCE_MAX_RNN_NEURONS 0
+#endif
+
+
+
+
+/* API */
+
+
+void osce_enhance_frame(
+ OSCEModel *model, /* I OSCE model struct */
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I Decoder control */
+ opus_int16 xq[], /* I/O Decoded speech */
+ opus_int32 num_bits, /* I Size of SILK payload in bits */
+ int arch /* I Run-time architecture */
+);
+
+
+int osce_load_models(OSCEModel *hModel, const void *data, int len);
+void osce_reset(silk_OSCE_struct *hOSCE, int method);
+
+
+#endif
diff --git a/dnn/osce_config.h b/dnn/osce_config.h
new file mode 100644
index 00000000..bc187bb3
--- /dev/null
+++ b/dnn/osce_config.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_CONFIG
+#define OSCE_CONFIG
+
+#define OSCE_FEATURES_MAX_HISTORY 350
+#define OSCE_FEATURE_DIM 93
+#define OSCE_MAX_FEATURE_FRAMES 4
+
+#define OSCE_CLEAN_SPEC_NUM_BANDS 64
+#define OSCE_NOISY_SPEC_NUM_BANDS 18
+
+#define OSCE_NO_PITCH_VALUE 7
+
+#define OSCE_PREEMPH 0.85f
+
+#define OSCE_PITCH_HANGOVER 0
+
+#define OSCE_CLEAN_SPEC_START 0
+#define OSCE_CLEAN_SPEC_LENGTH 64
+
+#define OSCE_NOISY_CEPSTRUM_START 64
+#define OSCE_NOISY_CEPSTRUM_LENGTH 18
+
+#define OSCE_ACORR_START 82
+#define OSCE_ACORR_LENGTH 5
+
+#define OSCE_LTP_START 87
+#define OSCE_LTP_LENGTH 5
+
+#define OSCE_LOG_GAIN_START 92
+#define OSCE_LOG_GAIN_LENGTH 1
+
+
+#endif
diff --git a/dnn/osce_features.c b/dnn/osce_features.c
new file mode 100644
index 00000000..bcd48016
--- /dev/null
+++ b/dnn/osce_features.c
@@ -0,0 +1,454 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define OSCE_SPEC_WINDOW_SIZE 320
+#define OSCE_SPEC_NUM_FREQS 161
+
+
+/*DEBUG*/
+/*#define WRITE_FEATURES*/
+/*#define DEBUG_PRING*/
+/*******/
+
+#include "stack_alloc.h"
+#include "osce_features.h"
+#include "kiss_fft.h"
+#include "os_support.h"
+#include "osce.h"
+#include "freq.h"
+
+
+#if defined(WRITE_FEATURES) || defined(DEBUG_PRING)
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+static const int center_bins_clean[64] = {
+ 0, 2, 5, 8, 10, 12, 15, 18,
+ 20, 22, 25, 28, 30, 33, 35, 38,
+ 40, 42, 45, 48, 50, 52, 55, 58,
+ 60, 62, 65, 68, 70, 73, 75, 78,
+ 80, 82, 85, 88, 90, 92, 95, 98,
+ 100, 102, 105, 108, 110, 112, 115, 118,
+ 120, 122, 125, 128, 130, 132, 135, 138,
+ 140, 142, 145, 148, 150, 152, 155, 160
+};
+
+static const int center_bins_noisy[18] = {
+ 0, 4, 8, 12, 16, 20, 24, 28,
+ 32, 40, 48, 56, 64, 80, 96, 112,
+ 136, 160
+};
+
+static const float band_weights_clean[64] = {
+ 0.666666666667f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
+ 0.500000000000f, 0.400000000000f, 0.250000000000f, 0.333333333333f
+};
+
+static const float band_weights_noisy[18] = {
+ 0.400000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
+ 0.250000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
+ 0.166666666667f, 0.125000000000f, 0.125000000000f, 0.125000000000f,
+ 0.083333333333f, 0.062500000000f, 0.062500000000f, 0.050000000000f,
+ 0.041666666667f, 0.080000000000f
+};
+
+static float osce_window[OSCE_SPEC_WINDOW_SIZE] = {
+ 0.004908718808f, 0.014725683311f, 0.024541228523f, 0.034354408400f, 0.044164277127f,
+ 0.053969889210f, 0.063770299562f, 0.073564563600f, 0.083351737332f, 0.093130877450f,
+ 0.102901041421f, 0.112661287575f, 0.122410675199f, 0.132148264628f, 0.141873117332f,
+ 0.151584296010f, 0.161280864678f, 0.170961888760f, 0.180626435180f, 0.190273572448f,
+ 0.199902370753f, 0.209511902052f, 0.219101240157f, 0.228669460829f, 0.238215641862f,
+ 0.247738863176f, 0.257238206902f, 0.266712757475f, 0.276161601717f, 0.285583828929f,
+ 0.294978530977f, 0.304344802381f, 0.313681740399f, 0.322988445118f, 0.332264019538f,
+ 0.341507569661f, 0.350718204573f, 0.359895036535f, 0.369037181064f, 0.378143757022f,
+ 0.387213886697f, 0.396246695891f, 0.405241314005f, 0.414196874117f, 0.423112513073f,
+ 0.431987371563f, 0.440820594212f, 0.449611329655f, 0.458358730621f, 0.467061954019f,
+ 0.475720161014f, 0.484332517110f, 0.492898192230f, 0.501416360796f, 0.509886201809f,
+ 0.518306898929f, 0.526677640552f, 0.534997619887f, 0.543266035038f, 0.551482089078f,
+ 0.559644990127f, 0.567753951426f, 0.575808191418f, 0.583806933818f, 0.591749407690f,
+ 0.599634847523f, 0.607462493302f, 0.615231590581f, 0.622941390558f, 0.630591150148f,
+ 0.638180132051f, 0.645707604824f, 0.653172842954f, 0.660575126926f, 0.667913743292f,
+ 0.675187984742f, 0.682397150168f, 0.689540544737f, 0.696617479953f, 0.703627273726f,
+ 0.710569250438f, 0.717442741007f, 0.724247082951f, 0.730981620454f, 0.737645704427f,
+ 0.744238692572f, 0.750759949443f, 0.757208846506f, 0.763584762206f, 0.769887082016f,
+ 0.776115198508f, 0.782268511401f, 0.788346427627f, 0.794348361383f, 0.800273734191f,
+ 0.806121974951f, 0.811892519997f, 0.817584813152f, 0.823198305781f, 0.828732456844f,
+ 0.834186732948f, 0.839560608398f, 0.844853565250f, 0.850065093356f, 0.855194690420f,
+ 0.860241862039f, 0.865206121757f, 0.870086991109f, 0.874883999665f, 0.879596685080f,
+ 0.884224593137f, 0.888767277786f, 0.893224301196f, 0.897595233788f, 0.901879654283f,
+ 0.906077149740f, 0.910187315596f, 0.914209755704f, 0.918144082372f, 0.921989916403f,
+ 0.925746887127f, 0.929414632439f, 0.932992798835f, 0.936481041442f, 0.939879024058f,
+ 0.943186419177f, 0.946402908026f, 0.949528180593f, 0.952561935658f, 0.955503880820f,
+ 0.958353732530f, 0.961111216112f, 0.963776065795f, 0.966348024735f, 0.968826845041f,
+ 0.971212287799f, 0.973504123096f, 0.975702130039f, 0.977806096779f, 0.979815820533f,
+ 0.981731107599f, 0.983551773378f, 0.985277642389f, 0.986908548290f, 0.988444333892f,
+ 0.989884851171f, 0.991229961288f, 0.992479534599f, 0.993633450666f, 0.994691598273f,
+ 0.995653875433f, 0.996520189401f, 0.997290456679f, 0.997964603026f, 0.998542563469f,
+ 0.999024282300f, 0.999409713092f, 0.999698818696f, 0.999891571247f, 0.999987952167f,
+ 0.999987952167f, 0.999891571247f, 0.999698818696f, 0.999409713092f, 0.999024282300f,
+ 0.998542563469f, 0.997964603026f, 0.997290456679f, 0.996520189401f, 0.995653875433f,
+ 0.994691598273f, 0.993633450666f, 0.992479534599f, 0.991229961288f, 0.989884851171f,
+ 0.988444333892f, 0.986908548290f, 0.985277642389f, 0.983551773378f, 0.981731107599f,
+ 0.979815820533f, 0.977806096779f, 0.975702130039f, 0.973504123096f, 0.971212287799f,
+ 0.968826845041f, 0.966348024735f, 0.963776065795f, 0.961111216112f, 0.958353732530f,
+ 0.955503880820f, 0.952561935658f, 0.949528180593f, 0.946402908026f, 0.943186419177f,
+ 0.939879024058f, 0.936481041442f, 0.932992798835f, 0.929414632439f, 0.925746887127f,
+ 0.921989916403f, 0.918144082372f, 0.914209755704f, 0.910187315596f, 0.906077149740f,
+ 0.901879654283f, 0.897595233788f, 0.893224301196f, 0.888767277786f, 0.884224593137f,
+ 0.879596685080f, 0.874883999665f, 0.870086991109f, 0.865206121757f, 0.860241862039f,
+ 0.855194690420f, 0.850065093356f, 0.844853565250f, 0.839560608398f, 0.834186732948f,
+ 0.828732456844f, 0.823198305781f, 0.817584813152f, 0.811892519997f, 0.806121974951f,
+ 0.800273734191f, 0.794348361383f, 0.788346427627f, 0.782268511401f, 0.776115198508f,
+ 0.769887082016f, 0.763584762206f, 0.757208846506f, 0.750759949443f, 0.744238692572f,
+ 0.737645704427f, 0.730981620454f, 0.724247082951f, 0.717442741007f, 0.710569250438f,
+ 0.703627273726f, 0.696617479953f, 0.689540544737f, 0.682397150168f, 0.675187984742f,
+ 0.667913743292f, 0.660575126926f, 0.653172842954f, 0.645707604824f, 0.638180132051f,
+ 0.630591150148f, 0.622941390558f, 0.615231590581f, 0.607462493302f, 0.599634847523f,
+ 0.591749407690f, 0.583806933818f, 0.575808191418f, 0.567753951426f, 0.559644990127f,
+ 0.551482089078f, 0.543266035038f, 0.534997619887f, 0.526677640552f, 0.518306898929f,
+ 0.509886201809f, 0.501416360796f, 0.492898192230f, 0.484332517110f, 0.475720161014f,
+ 0.467061954019f, 0.458358730621f, 0.449611329655f, 0.440820594212f, 0.431987371563f,
+ 0.423112513073f, 0.414196874117f, 0.405241314005f, 0.396246695891f, 0.387213886697f,
+ 0.378143757022f, 0.369037181064f, 0.359895036535f, 0.350718204573f, 0.341507569661f,
+ 0.332264019538f, 0.322988445118f, 0.313681740399f, 0.304344802381f, 0.294978530977f,
+ 0.285583828929f, 0.276161601717f, 0.266712757475f, 0.257238206902f, 0.247738863176f,
+ 0.238215641862f, 0.228669460829f, 0.219101240157f, 0.209511902052f, 0.199902370753f,
+ 0.190273572448f, 0.180626435180f, 0.170961888760f, 0.161280864678f, 0.151584296010f,
+ 0.141873117332f, 0.132148264628f, 0.122410675199f, 0.112661287575f, 0.102901041421f,
+ 0.093130877450f, 0.083351737332f, 0.073564563600f, 0.063770299562f, 0.053969889210f,
+ 0.044164277127f, 0.034354408400f, 0.024541228523f, 0.014725683311f, 0.004908718808f
+};
+
+static void apply_filterbank(float *x_out, float *x_in, const int *center_bins, const float* band_weights, int num_bands)
+{
+ int b, i;
+ float frac;
+
+ celt_assert(x_in != x_out)
+
+ x_out[0] = 0;
+ for (b = 0; b < num_bands - 1; b++)
+ {
+ x_out[b+1] = 0;
+ for (i = center_bins[b]; i < center_bins[b+1]; i++)
+ {
+ frac = (float) (center_bins[b+1] - i) / (center_bins[b+1] - center_bins[b]);
+ x_out[b] += band_weights[b] * frac * x_in[i];
+ x_out[b+1] += band_weights[b+1] * (1 - frac) * x_in[i];
+
+ }
+ }
+ x_out[num_bands - 1] += band_weights[num_bands - 1] * x_in[center_bins[num_bands - 1]];
+#ifdef DEBUG_PRINT
+ for (b = 0; b < num_bands; b++)
+ {
+ printf("band[%d]: %f\n", b, x_out[b]);
+ }
+#endif
+}
+
+
+static void mag_spec_320_onesided(float *out, float *in)
+{
+ celt_assert(OSCE_SPEC_WINDOW_SIZE == 320);
+ kiss_fft_cpx buffer[OSCE_SPEC_WINDOW_SIZE];
+ int k;
+ forward_transform(buffer, in);
+
+ for (k = 0; k < OSCE_SPEC_NUM_FREQS; k++)
+ {
+ out[k] = OSCE_SPEC_WINDOW_SIZE * sqrt(buffer[k].r * buffer[k].r + buffer[k].i * buffer[k].i);
+#ifdef DEBUG_PRINT
+ printf("magspec[%d]: %f\n", k, out[k]);
+#endif
+ }
+}
+
+
+static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int lpc_order)
+{
+ float buffer[OSCE_SPEC_WINDOW_SIZE] = {0};
+ int i;
+
+ /* zero expansion */
+ buffer[0] = 1;
+ for (i = 0; i < lpc_order; i++)
+ {
+ buffer[i+1] = - (float)a_q12[i] / (1U << 12);
+ }
+
+ /* calculate and invert magnitude spectrum */
+ mag_spec_320_onesided(buffer, buffer);
+
+ for (i = 0; i < OSCE_SPEC_NUM_FREQS; i++)
+ {
+ buffer[i] = 1.f / (buffer[i] + 1e-9f);
+ }
+
+ /* apply filterbank */
+ apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS);
+
+ /* log and scaling */
+ for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++)
+ {
+ spec[i] = 0.3f * log(spec[i] + 1e-9f);
+ }
+}
+
+static void calculate_cepstrum(float *cepstrum, float *signal)
+{
+ float buffer[OSCE_SPEC_WINDOW_SIZE];
+ float *spec = &buffer[OSCE_SPEC_NUM_FREQS + 3];
+ int n;
+
+ celt_assert(cepstrum != signal)
+
+ for (n = 0; n < OSCE_SPEC_WINDOW_SIZE; n++)
+ {
+ buffer[n] = osce_window[n] * signal[n];
+ }
+
+ /* calculate magnitude spectrum */
+ mag_spec_320_onesided(buffer, buffer);
+
+ /* accumulate bands */
+ apply_filterbank(spec, buffer, center_bins_noisy, band_weights_noisy, OSCE_NOISY_SPEC_NUM_BANDS);
+
+ /* log domain conversion */
+ for (n = 0; n < OSCE_NOISY_SPEC_NUM_BANDS; n++)
+ {
+ spec[n] = log(spec[n] + 1e-9f);
+#ifdef DEBUG_PRINT
+ printf("logspec[%d]: %f\n", n, spec[n]);
+#endif
+ }
+
+ /* DCT-II (orthonormal) */
+ celt_assert(OSCE_NOISY_SPEC_NUM_BANDS == NB_BANDS);
+ dct(cepstrum, spec);
+}
+
+static void calculate_acorr(float *acorr, float *signal, int lag)
+{
+ int n, k;
+ celt_assert(acorr != signal)
+
+ for (k = -2; k <= 2; k++)
+ {
+ acorr[k+2] = 0;
+ float xx = 0;
+ float xy = 0;
+ float yy = 0;
+ for (n = 0; n < 80; n++)
+ {
+ /* obviously wasteful -> fix later */
+ xx += signal[n] * signal[n];
+ yy += signal[n - lag + k] * signal[n - lag + k];
+ xy += signal[n] * signal[n - lag + k];
+ }
+ acorr[k+2] = xy / sqrt(xx * yy + 1e-9f);
+ }
+}
+
+static int pitch_postprocessing(OSCEFeatureState *psFeatures, int lag, int type)
+{
+ int new_lag;
+ int modulus;
+
+#ifdef OSCE_HANGOVER_BUGFIX
+#define TESTBIT 1
+#else
+#define TESTBIT 0
+#endif
+
+ modulus = OSCE_PITCH_HANGOVER;
+ if (modulus == 0) modulus ++;
+
+ /* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */
+ if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT)
+ /* enter hangover */
+ {
+ new_lag = OSCE_NO_PITCH_VALUE;
+ if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER)
+ {
+ new_lag = psFeatures->last_lag;
+ psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % modulus;
+ }
+ }
+ else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT)
+ /* continue hangover */
+ {
+ new_lag = psFeatures->last_lag;
+ psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % modulus;
+ }
+ else if (type != TYPE_VOICED)
+ /* unvoiced frame after hangover */
+ {
+ new_lag = OSCE_NO_PITCH_VALUE;
+ psFeatures->pitch_hangover_count = 0;
+ }
+ else
+ /* voiced frame: update last_lag */
+ {
+ new_lag = lag;
+ psFeatures->last_lag = lag;
+ psFeatures->pitch_hangover_count = 0;
+ }
+
+ /* buffer update */
+ psFeatures->last_type = type;
+
+ /* with the current setup this should never happen (but who knows...) */
+ celt_assert(new_lag)
+
+ return new_lag;
+}
+
+void osce_calculate_features(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I Decoder control */
+ float *features, /* O input features */
+ float *numbits, /* O numbits and smoothed numbits */
+ int *periods, /* O pitch lags on subframe basis */
+ const opus_int16 xq[], /* I Decoded speech */
+ opus_int32 num_bits /* I Size of SILK payload in bits */
+)
+{
+ int num_subframes, num_samples;
+ float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80];
+ float *frame, *pfeatures;
+ OSCEFeatureState *psFeatures;
+ int i, n, k;
+#ifdef WRITE_FEATURES
+ static FILE *f_feat = NULL;
+ if (f_feat == NULL)
+ {
+ f_feat = fopen("assembled_features.f32", "wb");
+ }
+#endif
+
+ /*OPUS_CLEAR(buffer, 1);*/
+ memset(buffer, 0, sizeof(buffer));
+
+ num_subframes = psDec->nb_subfr;
+ num_samples = num_subframes * 80;
+ psFeatures = &psDec->osce.features;
+
+ /* smooth bit count */
+ psFeatures->numbits_smooth = 0.9f * psFeatures->numbits_smooth + 0.1f * num_bits;
+ numbits[0] = num_bits;
+ numbits[1] = psFeatures->numbits_smooth;
+
+ for (n = 0; n < num_samples; n++)
+ {
+ buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15);
+ }
+ OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY);
+
+ for (k = 0; k < num_subframes; k++)
+ {
+ pfeatures = features + k * OSCE_FEATURE_DIM;
+ frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80];
+ memset(pfeatures, 0, OSCE_FEATURE_DIM); /* precaution */
+
+ /* clean spectrum from lpcs (update every other frame) */
+ if (k % 2 == 0)
+ {
+ calculate_log_spectrum_from_lpc(pfeatures + OSCE_CLEAN_SPEC_START, psDecCtrl->PredCoef_Q12[k >> 1], psDec->LPC_order);
+ }
+ else
+ {
+ OPUS_COPY(pfeatures + OSCE_CLEAN_SPEC_START, pfeatures + OSCE_CLEAN_SPEC_START - OSCE_FEATURE_DIM, OSCE_CLEAN_SPEC_LENGTH);
+ }
+
+ /* noisy cepstrum from signal (update every other frame) */
+ if (k % 2 == 0)
+ {
+ calculate_cepstrum(pfeatures + OSCE_NOISY_CEPSTRUM_START, frame - 160);
+ }
+ else
+ {
+ OPUS_COPY(pfeatures + OSCE_NOISY_CEPSTRUM_START, pfeatures + OSCE_NOISY_CEPSTRUM_START - OSCE_FEATURE_DIM, OSCE_NOISY_CEPSTRUM_LENGTH);
+ }
+
+ /* pitch hangover and zero value replacement */
+ periods[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType);
+
+ /* auto-correlation around pitch lag */
+ calculate_acorr(pfeatures + OSCE_ACORR_START, frame, periods[k]);
+
+ /* ltp */
+ celt_assert(OSCE_LTP_LENGTH == LTP_ORDER)
+ for (i = 0; i < OSCE_LTP_LENGTH; i++)
+ {
+ pfeatures[OSCE_LTP_START + i] = (float) psDecCtrl->LTPCoef_Q14[k * LTP_ORDER + i] / (1U << 14);
+ }
+
+ /* frame gain */
+ pfeatures[OSCE_LOG_GAIN_START] = log((float) psDecCtrl->Gains_Q16[k] / (1UL << 16) + 1e-9f);
+
+#ifdef WRITE_FEATURES
+ fwrite(pfeatures, sizeof(*pfeatures), 93, f_feat);
+#endif
+ }
+
+ /* buffer update */
+ OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);
+}
+
+
+void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length)
+{
+ int i;
+ celt_assert(length >= 160);
+
+ for (i = 0; i < 160; i++)
+ {
+ x_enhanced[i] = osce_window[i] * x_enhanced[i] + (1.f - osce_window[i]) * x_in[i];
+ }
+
+
+}
diff --git a/dnn/osce_features.h b/dnn/osce_features.h
new file mode 100644
index 00000000..1eeb5677
--- /dev/null
+++ b/dnn/osce_features.h
@@ -0,0 +1,50 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_FEATURES_H
+#define OSCE_FEATURES_H
+
+
+#include "structs.h"
+#include "opus_types.h"
+
+#define OSCE_NUMBITS_BUGFIX
+
+void osce_calculate_features(
+ silk_decoder_state *psDec, /* I/O Decoder state */
+ silk_decoder_control *psDecCtrl, /* I Decoder control */
+ float *features, /* O input features */
+ float *numbits, /* O numbits and smoothed numbits */
+ int *periods, /* O pitch lags on subframe basis */
+ const opus_int16 xq[], /* I Decoded speech */
+ opus_int32 num_bits /* I Size of SILK payload in bits */
+);
+
+
+void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length);
+
+#endif
diff --git a/dnn/osce_structs.h b/dnn/osce_structs.h
new file mode 100644
index 00000000..6358681f
--- /dev/null
+++ b/dnn/osce_structs.h
@@ -0,0 +1,125 @@
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OSCE_STRUCTS_H
+#define OSCE_STRUCTS_H
+
+#include "opus_types.h"
+#include "osce_config.h"
+#ifndef DISABLE_LACE
+#include "lace_data.h"
+#endif
+#ifndef DISABLE_NOLACE
+#include "nolace_data.h"
+#endif
+#include "nndsp.h"
+#include "nnet.h"
+
+/* feature calculation */
+
+typedef struct {
+ float numbits_smooth;
+ int pitch_hangover_count;
+ int last_lag;
+ int last_type;
+ float signal_history[OSCE_FEATURES_MAX_HISTORY];
+ int reset;
+} OSCEFeatureState;
+
+
+#ifndef DISABLE_LACE
+/* LACE */
+typedef struct {
+ float feature_net_conv2_state[LACE_FNET_CONV2_STATE_SIZE];
+ float feature_net_gru_state[LACE_COND_DIM];
+ AdaCombState cf1_state;
+ AdaCombState cf2_state;
+ AdaConvState af1_state;
+ float preemph_mem;
+ float deemph_mem;
+} LACEState;
+
+typedef struct
+{
+ LACELayers layers;
+ float window[LACE_OVERLAP_SIZE];
+} LACE;
+
+#endif /* #ifndef DISABLE_LACE */
+
+
+#ifndef DISABLE_NOLACE
+/* NoLACE */
+typedef struct {
+ float feature_net_conv2_state[NOLACE_FNET_CONV2_STATE_SIZE];
+ float feature_net_gru_state[NOLACE_COND_DIM];
+ float post_cf1_state[NOLACE_COND_DIM];
+ float post_cf2_state[NOLACE_COND_DIM];
+ float post_af1_state[NOLACE_COND_DIM];
+ float post_af2_state[NOLACE_COND_DIM];
+ float post_af3_state[NOLACE_COND_DIM];
+ AdaCombState cf1_state;
+ AdaCombState cf2_state;
+ AdaConvState af1_state;
+ AdaConvState af2_state;
+ AdaConvState af3_state;
+ AdaConvState af4_state;
+ AdaShapeState tdshape1_state;
+ AdaShapeState tdshape2_state;
+ AdaShapeState tdshape3_state;
+ float preemph_mem;
+ float deemph_mem;
+} NoLACEState;
+
+typedef struct {
+ NOLACELayers layers;
+ float window[LACE_OVERLAP_SIZE];
+} NoLACE;
+
+#endif /* #ifndef DISABLE_NOLACE */
+
+/* OSCEModel */
+typedef struct {
+ int loaded;
+#ifndef DISABLE_LACE
+ LACE lace;
+#endif
+#ifndef DISABLE_NOLACE
+ NoLACE nolace;
+#endif
+} OSCEModel;
+
+typedef union {
+#ifndef DISABLE_LACE
+ LACEState lace;
+#endif
+#ifndef DISABLE_NOLACE
+ NoLACEState nolace;
+#endif
+} OSCEState;
+
+#endif
diff --git a/dnn/parse_lpcnet_weights.c b/dnn/parse_lpcnet_weights.c
new file mode 100644
index 00000000..01ab7f8e
--- /dev/null
+++ b/dnn/parse_lpcnet_weights.c
@@ -0,0 +1,238 @@
+/* Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string.h>
+#include <stdlib.h>
+#include "nnet.h"
+#include "os_support.h"
+
+#define SPARSE_BLOCK_SIZE 32
+
+int parse_record(const void **data, int *len, WeightArray *array) {
+ WeightHead *h = (WeightHead *)*data;
+ if (*len < WEIGHT_BLOCK_SIZE) return -1;
+ if (h->block_size < h->size) return -1;
+ if (h->block_size > *len-WEIGHT_BLOCK_SIZE) return -1;
+ if (h->name[sizeof(h->name)-1] != 0) return -1;
+ if (h->size < 0) return -1;
+ array->name = h->name;
+ array->type = h->type;
+ array->size = h->size;
+ array->data = (void*)((unsigned char*)(*data)+WEIGHT_BLOCK_SIZE);
+
+ *data = (void*)((unsigned char*)*data + h->block_size+WEIGHT_BLOCK_SIZE);
+ *len -= h->block_size+WEIGHT_BLOCK_SIZE;
+ return array->size;
+}
+
+int parse_weights(WeightArray **list, const void *data, int len)
+{
+ int nb_arrays=0;
+ int capacity=20;
+ *list = opus_alloc(capacity*sizeof(WeightArray));
+ while (len > 0) {
+ int ret;
+ WeightArray array = {NULL, 0, 0, 0};
+ ret = parse_record(&data, &len, &array);
+ if (ret > 0) {
+ if (nb_arrays+1 >= capacity) {
+ /* Make sure there's room for the ending NULL element too. */
+ capacity = capacity*3/2;
+ *list = opus_realloc(*list, capacity*sizeof(WeightArray));
+ }
+ (*list)[nb_arrays++] = array;
+ } else {
+ opus_free(*list);
+ *list = NULL;
+ return -1;
+ }
+ }
+ (*list)[nb_arrays].name=NULL;
+ return nb_arrays;
+}
+
+static const void *find_array_entry(const WeightArray *arrays, const char *name) {
+ while (arrays->name && strcmp(arrays->name, name) != 0) arrays++;
+ return arrays;
+}
+
+static const void *find_array_check(const WeightArray *arrays, const char *name, int size) {
+ const WeightArray *a = find_array_entry(arrays, name);
+ if (a->name && a->size == size) return a->data;
+ else return NULL;
+}
+
+static const void *opt_array_check(const WeightArray *arrays, const char *name, int size, int *error) {
+ const WeightArray *a = find_array_entry(arrays, name);
+ *error = (a->name != NULL && a->size != size);
+ if (a->name && a->size == size) return a->data;
+ else return NULL;
+}
+
+static const void *find_idx_check(const WeightArray *arrays, const char *name, int nb_in, int nb_out, int *total_blocks) {
+ int remain;
+ const int *idx;
+ const WeightArray *a = find_array_entry(arrays, name);
+ *total_blocks = 0;
+ if (a == NULL) return NULL;
+ idx = a->data;
+ remain = a->size/sizeof(int);
+ while (remain > 0) {
+ int nb_blocks;
+ int i;
+ nb_blocks = *idx++;
+ if (remain < nb_blocks+1) return NULL;
+ for (i=0;i<nb_blocks;i++) {
+ int pos = *idx++;
+ if (pos+3 >= nb_in || (pos&0x3)) return NULL;
+ }
+ nb_out -= 8;
+ remain -= nb_blocks+1;
+ *total_blocks += nb_blocks;
+ }
+ if (nb_out != 0) return NULL;
+ return a->data;
+}
+
+int linear_init(LinearLayer *layer, const WeightArray *arrays,
+ const char *bias,
+ const char *subias,
+ const char *weights,
+ const char *float_weights,
+ const char *weights_idx,
+ const char *diag,
+ const char *scale,
+ int nb_inputs,
+ int nb_outputs)
+{
+ int err;
+ layer->bias = NULL;
+ layer->subias = NULL;
+ layer->weights = NULL;
+ layer->float_weights = NULL;
+ layer->weights_idx = NULL;
+ layer->diag = NULL;
+ layer->scale = NULL;
+ if (bias != NULL) {
+ if ((layer->bias = find_array_check(arrays, bias, nb_outputs*sizeof(layer->bias[0]))) == NULL) return 1;
+ }
+ if (subias != NULL) {
+ if ((layer->subias = find_array_check(arrays, subias, nb_outputs*sizeof(layer->subias[0]))) == NULL) return 1;
+ }
+ if (weights_idx != NULL) {
+ int total_blocks;
+ if ((layer->weights_idx = find_idx_check(arrays, weights_idx, nb_inputs, nb_outputs, &total_blocks)) == NULL) return 1;
+ if (weights != NULL) {
+ if ((layer->weights = find_array_check(arrays, weights, SPARSE_BLOCK_SIZE*total_blocks*sizeof(layer->weights[0]))) == NULL) return 1;
+ }
+ if (float_weights != NULL) {
+ layer->float_weights = opt_array_check(arrays, float_weights, SPARSE_BLOCK_SIZE*total_blocks*sizeof(layer->float_weights[0]), &err);
+ if (err) return 1;
+ }
+ } else {
+ if (weights != NULL) {
+ if ((layer->weights = find_array_check(arrays, weights, nb_inputs*nb_outputs*sizeof(layer->weights[0]))) == NULL) return 1;
+ }
+ if (float_weights != NULL) {
+ layer->float_weights = opt_array_check(arrays, float_weights, nb_inputs*nb_outputs*sizeof(layer->float_weights[0]), &err);
+ if (err) return 1;
+ }
+ }
+ if (diag != NULL) {
+ if ((layer->diag = find_array_check(arrays, diag, nb_outputs*sizeof(layer->diag[0]))) == NULL) return 1;
+ }
+ if (weights != NULL) {
+ if ((layer->scale = find_array_check(arrays, scale, nb_outputs*sizeof(layer->scale[0]))) == NULL) return 1;
+ }
+ layer->nb_inputs = nb_inputs;
+ layer->nb_outputs = nb_outputs;
+ return 0;
+}
+
+int conv2d_init(Conv2dLayer *layer, const WeightArray *arrays,
+ const char *bias,
+ const char *float_weights,
+ int in_channels,
+ int out_channels,
+ int ktime,
+ int kheight)
+{
+ int err;
+ layer->bias = NULL;
+ layer->float_weights = NULL;
+ if (bias != NULL) {
+ if ((layer->bias = find_array_check(arrays, bias, out_channels*sizeof(layer->bias[0]))) == NULL) return 1;
+ }
+ if (float_weights != NULL) {
+ layer->float_weights = opt_array_check(arrays, float_weights, in_channels*out_channels*ktime*kheight*sizeof(layer->float_weights[0]), &err);
+ if (err) return 1;
+ }
+ layer->in_channels = in_channels;
+ layer->out_channels = out_channels;
+ layer->ktime = ktime;
+ layer->kheight = kheight;
+ return 0;
+}
+
+
+
+#if 0
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <stdio.h>
+
+int main()
+{
+ int fd;
+ void *data;
+ int len;
+ int nb_arrays;
+ int i;
+ WeightArray *list;
+ struct stat st;
+ const char *filename = "weights_blob.bin";
+ stat(filename, &st);
+ len = st.st_size;
+ fd = open(filename, O_RDONLY);
+ data = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+ printf("size is %d\n", len);
+ nb_arrays = parse_weights(&list, data, len);
+ for (i=0;i<nb_arrays;i++) {
+ printf("found %s: size %d\n", list[i].name, list[i].size);
+ }
+ printf("%p\n", list[i].name);
+ opus_free(list);
+ munmap(data, len);
+ close(fd);
+ return 0;
+}
+#endif
diff --git a/dnn/pitchdnn.c b/dnn/pitchdnn.c
new file mode 100644
index 00000000..5cf96e7f
--- /dev/null
+++ b/dnn/pitchdnn.c
@@ -0,0 +1,79 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include "pitchdnn.h"
+#include "os_support.h"
+#include "nnet.h"
+#include "lpcnet_private.h"
+
+
+float compute_pitchdnn(
+ PitchDNNState *st,
+ const float *if_features,
+ const float *xcorr_features,
+ int arch
+ )
+{
+ float if1_out[DENSE_IF_UPSAMPLER_1_OUT_SIZE];
+ float downsampler_in[NB_XCORR_FEATURES + DENSE_IF_UPSAMPLER_2_OUT_SIZE];
+ float downsampler_out[DENSE_DOWNSAMPLER_OUT_SIZE];
+ float conv1_tmp1[(NB_XCORR_FEATURES + 2)*8] = {0};
+ float conv1_tmp2[(NB_XCORR_FEATURES + 2)*8] = {0};
+ float output[DENSE_FINAL_UPSAMPLER_OUT_SIZE];
+ int i;
+ int pos=0;
+ float maxval=-1;
+ float sum=0;
+ float count=0;
+ PitchDNN *model = &st->model;
+ /* IF */
+ compute_generic_dense(&model->dense_if_upsampler_1, if1_out, if_features, ACTIVATION_TANH, arch);
+ compute_generic_dense(&model->dense_if_upsampler_2, &downsampler_in[NB_XCORR_FEATURES], if1_out, ACTIVATION_TANH, arch);
+ /* xcorr*/
+ OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
+ compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH, arch);
+ compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH, arch);
+
+ compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH, arch);
+ compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out, arch);
+ compute_generic_dense(&model->dense_final_upsampler, output, st->gru_state, ACTIVATION_LINEAR, arch);
+ for (i=0;i<180;i++) {
+ if (output[i] > maxval) {
+ pos = i;
+ maxval = output[i];
+ }
+ }
+ for (i=IMAX(0, pos-2); i<=IMIN(179, pos+2); i++) {
+ float p = exp(output[i]);
+ sum += p*i;
+ count += p;
+ }
+ /*printf("%d %f\n", pos, sum/count);*/
+ return (1.f/60.f)*(sum/count) - 1.5;
+ /*return 256.f/pow(2.f, (1.f/60.f)*i);*/
+}
+
+
+void pitchdnn_init(PitchDNNState *st)
+{
+ int ret;
+ OPUS_CLEAR(st, 1);
+#ifndef USE_WEIGHTS_FILE
+ ret = init_pitchdnn(&st->model, pitchdnn_arrays);
+#else
+ ret = 0;
+#endif
+ celt_assert(ret == 0);
+}
+
+int pitchdnn_load_model(PitchDNNState *st, const void *data, int len) {
+ WeightArray *list;
+ int ret;
+ parse_weights(&list, data, len);
+ ret = init_pitchdnn(&st->model, list);
+ opus_free(list);
+ if (ret == 0) return 0;
+ else return -1;
+}
diff --git a/dnn/pitchdnn.h b/dnn/pitchdnn.h
new file mode 100644
index 00000000..25fa3a4b
--- /dev/null
+++ b/dnn/pitchdnn.h
@@ -0,0 +1,34 @@
+#ifndef PITCHDNN_H
+#define PITCHDNN_H
+
+
+typedef struct PitchDNN PitchDNN;
+
+#include "pitchdnn_data.h"
+
+#define PITCH_MIN_PERIOD 32
+#define PITCH_MAX_PERIOD 256
+
+#define NB_XCORR_FEATURES (PITCH_MAX_PERIOD-PITCH_MIN_PERIOD)
+
+
+typedef struct {
+ PitchDNN model;
+ float gru_state[GRU_1_STATE_SIZE];
+ float xcorr_mem1[(NB_XCORR_FEATURES + 2)*2];
+ float xcorr_mem2[(NB_XCORR_FEATURES + 2)*2*8];
+ float xcorr_mem3[(NB_XCORR_FEATURES + 2)*2*8];
+} PitchDNNState;
+
+
+void pitchdnn_init(PitchDNNState *st);
+int pitchdnn_load_model(PitchDNNState *st, const void *data, int len);
+
+float compute_pitchdnn(
+ PitchDNNState *st,
+ const float *if_features,
+ const float *xcorr_features,
+ int arch
+ );
+
+#endif
diff --git a/dnn/tansig_table.h b/dnn/tansig_table.h
new file mode 100644
index 00000000..ebec7e3a
--- /dev/null
+++ b/dnn/tansig_table.h
@@ -0,0 +1,50 @@
+/* This file is auto-generated by gen_tables */
+
+#ifndef TANSIG_TABLE_H
+#define TANSIG_TABLE_H
+
+static const float tansig_table[201] = {
+0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
+0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
+0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
+0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
+0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
+0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
+0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
+0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
+0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
+0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
+0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
+0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
+0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
+0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
+0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
+0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
+0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
+0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
+0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
+0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
+0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
+0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
+0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
+0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
+0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
+0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
+0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
+0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
+0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
+0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
+0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
+0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
+0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
+0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
+0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
+0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
+0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
+0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
+1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+1.000000f,
+};
+
+#endif /*TANSIG_TABLE_H*/
diff --git a/dnn/test_vec.c b/dnn/test_vec.c
new file mode 100644
index 00000000..d14d2502
--- /dev/null
+++ b/dnn/test_vec.c
@@ -0,0 +1,128 @@
+#include <stdio.h>
+#include <math.h>
+#include "opus_types.h"
+#include "arch.h"
+#include "common.h"
+#include "tansig_table.h"
+
+#define LPCNET_TEST
+
+// we need to call two versions of each functions that have the same
+// name, so use #defines to temp rename them
+
+#define lpcnet_exp2 lpcnet_exp2_fast
+#define tansig_approx tansig_approx_fast
+#define sigmoid_approx sigmoid_approx_fast
+#define softmax softmax_fast
+#define vec_tanh vec_tanh_fast
+#define vec_sigmoid vec_sigmoid_fast
+#define sgemv_accum16 sgemv_accum16_fast
+#define sparse_sgemv_accum16 sparse_sgemv_accum16_fast
+
+#ifdef __AVX__
+#include "vec_avx.h"
+#ifdef __AVX2__
+const char simd[]="AVX2";
+#else
+const char simd[]="AVX";
+#endif
+#elif __ARM_NEON__
+#include "vec_neon.h"
+const char simd[]="NEON";
+#else
+const char simd[]="none";
+
+#endif
+
+#undef lpcnet_exp2
+#undef tansig_approx
+#undef sigmoid_approx
+#undef softmax
+#undef vec_tanh
+#undef vec_sigmoid
+#undef sgemv_accum16
+#undef sparse_sgemv_accum16
+#include "vec.h"
+
+#define ROW_STEP 16
+#define ROWS ROW_STEP*10
+#define COLS 2
+#define ENTRIES 2
+
+int test_sgemv_accum16() {
+ float weights[ROWS*COLS];
+ float x[COLS];
+ float out[ROWS], out_fast[ROWS];
+ int i;
+
+ printf("sgemv_accum16.....................: ");
+ for(i=0; i<ROWS*COLS; i++) {
+ weights[i] = i;
+ }
+ for(i=0; i<ROWS; i++) {
+ out[i] = 0;
+ out_fast[i] = 0;
+ }
+
+ for(i=0; i<COLS; i++) {
+ x[i] = i+1;
+ }
+
+ sgemv_accum16(out, weights, ROWS, COLS, 1, x);
+ sgemv_accum16_fast(out_fast, weights, ROWS, COLS, 1, x);
+
+ for(i=0; i<ROWS; i++) {
+ if (out[i] != out_fast[i]) {
+ printf("fail\n");
+ for(i=0; i<ROWS; i++) {
+ printf("%d %f %f\n", i, out[i], out_fast[i]);
+ if (out[i] != out_fast[i])
+ return 1;
+ }
+ }
+ }
+
+ printf("pass\n");
+ return 0;
+}
+
+
+int test_sparse_sgemv_accum16() {
+ int rows = ROW_STEP*ENTRIES;
+ int indx[] = {1,0,2,0,1};
+ float w[ROW_STEP*(1+2)];
+ float x[ENTRIES] = {1,2};
+ float out[ROW_STEP*(1+2)], out_fast[ROW_STEP*(1+2)];
+ int i;
+
+ printf("sparse_sgemv_accum16..............: ");
+ for(i=0; i<ROW_STEP*(1+2); i++) {
+ w[i] = i;
+ out[i] = 0;
+ out_fast[i] = 0;
+ }
+
+ sparse_sgemv_accum16(out, w, rows, indx, x);
+ sparse_sgemv_accum16_fast(out_fast, w, rows, indx, x);
+
+ for(i=0; i<ROW_STEP*ENTRIES; i++) {
+ if (out[i] != out_fast[i]) {
+ printf("fail\n");
+ for(i=0; i<ROW_STEP*ENTRIES; i++) {
+ printf("%d %f %f\n", i, out[i], out_fast[i]);
+ if (out[i] != out_fast[i])
+ return 1;
+ }
+ }
+ }
+
+ printf("pass\n");
+ return 0;
+}
+
+int main() {
+ printf("testing vector routines on SIMD: %s\n", simd);
+ int test1 = test_sgemv_accum16();
+ int test2 = test_sparse_sgemv_accum16();
+ return test1 || test2;
+}
diff --git a/dnn/torch/dnntools/dnntools/__init__.py b/dnn/torch/dnntools/dnntools/__init__.py
new file mode 100644
index 00000000..117597ab
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/__init__.py
@@ -0,0 +1,2 @@
+from . import quantization
+from . import sparsification \ No newline at end of file
diff --git a/dnn/torch/dnntools/dnntools/quantization/__init__.py b/dnn/torch/dnntools/dnntools/quantization/__init__.py
new file mode 100644
index 00000000..3b46a2e0
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/quantization/__init__.py
@@ -0,0 +1 @@
+from .softquant import soft_quant, remove_soft_quant \ No newline at end of file
diff --git a/dnn/torch/dnntools/dnntools/quantization/softquant.py b/dnn/torch/dnntools/dnntools/quantization/softquant.py
new file mode 100644
index 00000000..877c6450
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/quantization/softquant.py
@@ -0,0 +1,113 @@
+import torch
+
+@torch.no_grad()
+def compute_optimal_scale(weight):
+ with torch.no_grad():
+ n_out, n_in = weight.shape
+ assert n_in % 4 == 0
+ if n_out % 8:
+ # add padding
+ pad = n_out - n_out % 8
+ weight = torch.cat((weight, torch.zeros((pad, n_in), dtype=weight.dtype, device=weight.device)), dim=0)
+
+ weight_max_abs, _ = torch.max(torch.abs(weight), dim=1)
+ weight_max_sum, _ = torch.max(torch.abs(weight[:, : n_in : 2] + weight[:, 1 : n_in : 2]), dim=1)
+ scale_max = weight_max_abs / 127
+ scale_sum = weight_max_sum / 129
+
+ scale = torch.maximum(scale_max, scale_sum)
+
+ return scale[:n_out]
+
+@torch.no_grad()
+def q_scaled_noise(module, weight):
+ if isinstance(module, torch.nn.Conv1d):
+ w = weight.permute(0, 2, 1).flatten(1)
+ noise = torch.rand_like(w) - 0.5
+ noise[w == 0] = 0 # ignore zero entries from sparsification
+ scale = compute_optimal_scale(w)
+ noise = noise * scale.unsqueeze(-1)
+ noise = noise.reshape(weight.size(0), weight.size(2), weight.size(1)).permute(0, 2, 1)
+ elif isinstance(module, torch.nn.ConvTranspose1d):
+ i, o, k = weight.shape
+ w = weight.permute(2, 1, 0).reshape(k * o, i)
+ noise = torch.rand_like(w) - 0.5
+ noise[w == 0] = 0 # ignore zero entries from sparsification
+ scale = compute_optimal_scale(w)
+ noise = noise * scale.unsqueeze(-1)
+ noise = noise.reshape(k, o, i).permute(2, 1, 0)
+ elif len(weight.shape) == 2:
+ noise = torch.rand_like(weight) - 0.5
+ noise[weight == 0] = 0 # ignore zero entries from sparsification
+ scale = compute_optimal_scale(weight)
+ noise = noise * scale.unsqueeze(-1)
+ else:
+ raise ValueError('unknown quantization setting')
+
+ return noise
+
+class SoftQuant:
+ name: str
+
+ def __init__(self, names: str, scale: float) -> None:
+ self.names = names
+ self.quantization_noise = None
+ self.scale = scale
+
+ def __call__(self, module, inputs, *args, before=True):
+ if not module.training: return
+
+ if before:
+ self.quantization_noise = dict()
+ for name in self.names:
+ weight = getattr(module, name)
+ if self.scale is None:
+ self.quantization_noise[name] = q_scaled_noise(module, weight)
+ else:
+ self.quantization_noise[name] = \
+ self.scale * (torch.rand_like(weight) - 0.5)
+ with torch.no_grad():
+ weight.data[:] = weight + self.quantization_noise[name]
+ else:
+ for name in self.names:
+ weight = getattr(module, name)
+ with torch.no_grad():
+ weight.data[:] = weight - self.quantization_noise[name]
+ self.quantization_noise = None
+
+ def apply(module, names=['weight'], scale=None):
+ fn = SoftQuant(names, scale)
+
+ for name in names:
+ if not hasattr(module, name):
+ raise ValueError("")
+
+ fn_before = lambda *x : fn(*x, before=True)
+ fn_after = lambda *x : fn(*x, before=False)
+ setattr(fn_before, 'sqm', fn)
+ setattr(fn_after, 'sqm', fn)
+
+
+ module.register_forward_pre_hook(fn_before)
+ module.register_forward_hook(fn_after)
+
+ module
+
+ return fn
+
+
+def soft_quant(module, names=['weight'], scale=None):
+ fn = SoftQuant.apply(module, names, scale)
+ return module
+
+def remove_soft_quant(module, names=['weight']):
+ for k, hook in module._forward_pre_hooks.items():
+ if hasattr(hook, 'sqm'):
+ if isinstance(hook.sqm, SoftQuant) and hook.sqm.names == names:
+ del module._forward_pre_hooks[k]
+ for k, hook in module._forward_hooks.items():
+ if hasattr(hook, 'sqm'):
+ if isinstance(hook.sqm, SoftQuant) and hook.sqm.names == names:
+ del module._forward_hooks[k]
+
+ return module \ No newline at end of file
diff --git a/dnn/torch/dnntools/dnntools/relegance/__init__.py b/dnn/torch/dnntools/dnntools/relegance/__init__.py
new file mode 100644
index 00000000..cee0143b
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/relegance/__init__.py
@@ -0,0 +1,2 @@
+from .relegance import relegance_gradient_weighting, relegance_create_tconv_kernel, relegance_map_relevance_to_input_domain, relegance_resize_relevance_to_input_size
+from .meta_critic import MetaCritic \ No newline at end of file
diff --git a/dnn/torch/dnntools/dnntools/relegance/meta_critic.py b/dnn/torch/dnntools/dnntools/relegance/meta_critic.py
new file mode 100644
index 00000000..1af0f8ff
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/relegance/meta_critic.py
@@ -0,0 +1,85 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+
+class MetaCritic():
+ def __init__(self, normalize=False, gamma=0.9, beta=0.0, joint_stats=False):
+ """ Class for assessing relevance of discriminator scores
+
+ Args:
+ gamma (float, optional): update rate for tracking discriminator stats. Defaults to 0.9.
+ beta (float, optional): Miminum confidence related threshold. Defaults to 0.0.
+ """
+ self.normalize = normalize
+ self.gamma = gamma
+ self.beta = beta
+ self.joint_stats = joint_stats
+
+ self.disc_stats = dict()
+
+ def __call__(self, disc_id, real_scores, generated_scores):
+ """ calculates relevance from normalized scores
+
+ Args:
+ disc_id (any valid key): id for tracking discriminator statistics
+ real_scores (torch.tensor): scores for real data
+ generated_scores (torch.tensor): scores for generated data; expecting device to match real_scores.device
+
+ Returns:
+ torch.tensor: output-domain relevance
+ """
+
+ if self.normalize:
+ real_std = torch.std(real_scores.detach()).cpu().item()
+ gen_std = torch.std(generated_scores.detach()).cpu().item()
+ std = (real_std**2 + gen_std**2) ** .5
+ mean = torch.mean(real_scores.detach()).cpu().item() - torch.mean(generated_scores.detach()).cpu().item()
+
+ key = 0 if self.joint_stats else disc_id
+
+ if key in self.disc_stats:
+ self.disc_stats[key]['std'] = self.gamma * self.disc_stats[key]['std'] + (1 - self.gamma) * std
+ self.disc_stats[key]['mean'] = self.gamma * self.disc_stats[key]['mean'] + (1 - self.gamma) * mean
+ else:
+ self.disc_stats[key] = {
+ 'std': std + 1e-5,
+ 'mean': mean
+ }
+
+ std = self.disc_stats[key]['std']
+ mean = self.disc_stats[key]['mean']
+ else:
+ mean, std = 0, 1
+
+ relevance = torch.relu((real_scores - generated_scores - mean) / std + mean - self.beta)
+
+ if False: print(f"relevance({disc_id}): {relevance.min()=} {relevance.max()=} {relevance.mean()=}")
+
+ return relevance \ No newline at end of file
diff --git a/dnn/torch/dnntools/dnntools/relegance/relegance.py b/dnn/torch/dnntools/dnntools/relegance/relegance.py
new file mode 100644
index 00000000..29c5be23
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/relegance/relegance.py
@@ -0,0 +1,449 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+import torch.nn.functional as F
+
+
+def view_one_hot(index, length):
+ vec = length * [1]
+ vec[index] = -1
+ return vec
+
+def create_smoothing_kernel(widths, gamma=1.5):
+ """ creates a truncated gaussian smoothing kernel for the given widths
+
+ Parameters:
+ -----------
+ widths: list[Int] or torch.LongTensor
+ specifies the shape of the smoothing kernel, entries must be > 0.
+
+ gamma: float, optional
+ decay factor for gaussian relative to kernel size
+
+ Returns:
+ --------
+ kernel: torch.FloatTensor
+ """
+
+ widths = torch.LongTensor(widths)
+ num_dims = len(widths)
+
+ assert(widths.min() > 0)
+
+ centers = widths.float() / 2 - 0.5
+ sigmas = gamma * (centers + 1)
+
+ vals = []
+
+ vals= [((torch.arange(widths[i]) - centers[i]) / sigmas[i]) ** 2 for i in range(num_dims)]
+ vals = sum([vals[i].view(view_one_hot(i, num_dims)) for i in range(num_dims)])
+
+ kernel = torch.exp(- vals)
+ kernel = kernel / kernel.sum()
+
+ return kernel
+
+
+def create_partition_kernel(widths, strides):
+ """ creates a partition kernel for mapping a convolutional network output back to the input domain
+
+ Given a fully convolutional network with receptive field of shape widths and the given strides, this
+ function construncts an intorpolation kernel whose tranlations by multiples of the given strides form
+ a partition of one on the input domain.
+
+ Parameter:
+ ----------
+ widths: list[Int] or torch.LongTensor
+ shape of receptive field
+
+ strides: list[Int] or torch.LongTensor
+ total strides of convolutional network
+
+ Returns:
+ kernel: torch.FloatTensor
+ """
+
+ num_dims = len(widths)
+ assert num_dims == len(strides) and num_dims in {1, 2, 3}
+
+ convs = {1 : F.conv1d, 2 : F.conv2d, 3 : F.conv3d}
+
+ widths = torch.LongTensor(widths)
+ strides = torch.LongTensor(strides)
+
+ proto_kernel = torch.ones(torch.minimum(strides, widths).tolist())
+
+ # create interpolation kernel eta
+ eta_widths = widths - strides + 1
+ if eta_widths.min() <= 0:
+ print("[create_partition_kernel] warning: receptive field does not cover input domain")
+ eta_widths = torch.maximum(eta_widths, torch.ones_like(eta_widths))
+
+
+ eta = create_smoothing_kernel(eta_widths).view(1, 1, *eta_widths.tolist())
+
+ padding = torch.repeat_interleave(eta_widths - 1, 2, 0).tolist()[::-1] # ordering of dimensions for padding and convolution functions is reversed in torch
+ padded_proto_kernel = F.pad(proto_kernel, padding)
+ padded_proto_kernel = padded_proto_kernel.view(1, 1, *padded_proto_kernel.shape)
+ kernel = convs[num_dims](padded_proto_kernel, eta)
+
+ return kernel
+
+
+def receptive_field(conv_model, input_shape, output_position):
+ """ estimates boundaries of receptive field connected to output_position via autograd
+
+ Parameters:
+ -----------
+ conv_model: nn.Module or autograd function
+ function or model implementing fully convolutional model
+
+ input_shape: List[Int]
+ input shape ignoring batch dimension, i.e. [num_channels, dim1, dim2, ...]
+
+ output_position: List[Int]
+ output position for which the receptive field is determined; the function raises an exception
+ if output_position is out of bounds for the given input_shape.
+
+ Returns:
+ --------
+ low: List[Int]
+ start indices of receptive field
+
+ high: List[Int]
+ stop indices of receptive field
+
+ """
+
+ x = torch.randn((1,) + tuple(input_shape), requires_grad=True)
+ y = conv_model(x)
+
+ # collapse channels and remove batch dimension
+ y = torch.sum(y, 1)[0]
+
+ # create mask
+ mask = torch.zeros_like(y)
+ index = [torch.tensor(i) for i in output_position]
+ try:
+ mask.index_put_(index, torch.tensor(1, dtype=mask.dtype))
+ except IndexError:
+ raise ValueError('output_position out of bounds')
+
+ (mask * y).sum().backward()
+
+ # sum over channels and remove batch dimension
+ grad = torch.sum(x.grad, dim=1)[0]
+ tmp = torch.nonzero(grad, as_tuple=True)
+ low = [t.min().item() for t in tmp]
+ high = [t.max().item() for t in tmp]
+
+ return low, high
+
+def estimate_conv_parameters(model, num_channels, num_dims, width, max_stride=10):
+ """ attempts to estimate receptive field size, strides and left paddings for given model
+
+
+ Parameters:
+ -----------
+ model: nn.Module or autograd function
+ fully convolutional model for which parameters are estimated
+
+ num_channels: Int
+ number of input channels for model
+
+ num_dims: Int
+ number of input dimensions for model (without channel dimension)
+
+ width: Int
+ width of the input tensor (a hyper-square) on which the receptive fields are derived via autograd
+
+ max_stride: Int, optional
+ assumed maximal stride of the model for any dimension, when set too low the function may fail for
+ any value of width
+
+ Returns:
+ --------
+ receptive_field_size: List[Int]
+ receptive field size in all dimension
+
+ strides: List[Int]
+ stride in all dimensions
+
+ left_paddings: List[Int]
+ left padding in all dimensions; this is relevant for aligning the receptive field on the input plane
+
+ Raises:
+ -------
+ ValueError, KeyError
+
+ """
+
+ input_shape = [num_channels] + num_dims * [width]
+ output_position1 = num_dims * [width // (2 * max_stride)]
+ output_position2 = num_dims * [width // (2 * max_stride) + 1]
+
+ low1, high1 = receptive_field(model, input_shape, output_position1)
+ low2, high2 = receptive_field(model, input_shape, output_position2)
+
+ widths1 = [h - l + 1 for l, h in zip(low1, high1)]
+ widths2 = [h - l + 1 for l, h in zip(low2, high2)]
+
+ if not all([w1 - w2 == 0 for w1, w2 in zip(widths1, widths2)]) or not all([l1 != l2 for l1, l2 in zip(low1, low2)]):
+ raise ValueError("[estimate_strides]: widths to small to determine strides")
+
+ receptive_field_size = widths1
+ strides = [l2 - l1 for l1, l2 in zip(low1, low2)]
+ left_paddings = [s * p - l for l, s, p in zip(low1, strides, output_position1)]
+
+ return receptive_field_size, strides, left_paddings
+
+def inspect_conv_model(model, num_channels, num_dims, max_width=10000, width_hint=None, stride_hint=None, verbose=False):
+ """ determines size of receptive field, strides and padding probabilistically
+
+
+ Parameters:
+ -----------
+ model: nn.Module or autograd function
+ fully convolutional model for which parameters are estimated
+
+ num_channels: Int
+ number of input channels for model
+
+ num_dims: Int
+ number of input dimensions for model (without channel dimension)
+
+ max_width: Int
+ maximum width of the input tensor (a hyper-square) on which the receptive fields are derived via autograd
+
+ verbose: bool, optional
+ if true, the function prints parameters for individual trials
+
+ Returns:
+ --------
+ receptive_field_size: List[Int]
+ receptive field size in all dimension
+
+ strides: List[Int]
+ stride in all dimensions
+
+ left_paddings: List[Int]
+ left padding in all dimensions; this is relevant for aligning the receptive field on the input plane
+
+ Raises:
+ -------
+ ValueError
+
+ """
+
+ max_stride = max_width // 2
+ stride = max_stride // 100
+ width = max_width // 100
+
+ if width_hint is not None: width = 2 * width_hint
+ if stride_hint is not None: stride = stride_hint
+
+ did_it = False
+ while width < max_width and stride < max_stride:
+ try:
+ if verbose: print(f"[inspect_conv_model] trying parameters {width=}, {stride=}")
+ receptive_field_size, strides, left_paddings = estimate_conv_parameters(model, num_channels, num_dims, width, stride)
+ did_it = True
+ except:
+ pass
+
+ if did_it: break
+
+ width *= 2
+ if width >= max_width and stride < max_stride:
+ stride *= 2
+ width = 2 * stride
+
+ if not did_it:
+ raise ValueError(f'could not determine conv parameter with given max_width={max_width}')
+
+ return receptive_field_size, strides, left_paddings
+
+
+class GradWeight(torch.autograd.Function):
+ def __init__(self):
+ super().__init__()
+
+ @staticmethod
+ def forward(ctx, x, weight):
+ ctx.save_for_backward(weight)
+ return x.clone()
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ weight, = ctx.saved_tensors
+
+ grad_input = grad_output * weight
+
+ return grad_input, None
+
+
+# API
+
+def relegance_gradient_weighting(x, weight):
+ """
+
+ Args:
+ x (torch.tensor): input tensor
+ weight (torch.tensor or None): weight tensor for gradients of x; if None, no gradient weighting will be applied in backward pass
+
+ Returns:
+ torch.tensor: the unmodified input tensor x
+
+ Raises:
+ RuntimeError: if estimation of parameters fails due to exceeded compute budget
+ """
+ if weight is None:
+ return x
+ else:
+ return GradWeight.apply(x, weight)
+
+
+
+def relegance_create_tconv_kernel(model, num_channels, num_dims, width_hint=None, stride_hint=None, verbose=False):
+ """ creates parameters for mapping back output domain relevance to input tomain
+
+ Args:
+ model (nn.Module or autograd.Function): fully convolutional model
+ num_channels (int): number of input channels to model
+ num_dims (int): number of input dimensions of model (without channel and batch dimension)
+ width_hint(int or None): optional hint at maximal width of receptive field
+ stride_hint(int or None): optional hint at maximal stride
+
+ Returns:
+ dict: contains kernel, kernel dimensions, strides and left paddings for transposed convolution
+ """
+
+ max_width = int(100000 / (10 ** num_dims))
+
+ did_it = False
+ try:
+ receptive_field_size, strides, left_paddings = inspect_conv_model(model, num_channels, num_dims, max_width=max_width, width_hint=width_hint, stride_hint=stride_hint, verbose=verbose)
+ did_it = True
+ except:
+ # try once again with larger max_width
+ max_width *= 10
+
+ # crash if exception is raised
+ try:
+ if not did_it: receptive_field_size, strides, left_paddings = inspect_conv_model(model, num_channels, num_dims, max_width=max_width, width_hint=width_hint, stride_hint=stride_hint, verbose=verbose)
+ except:
+ raise RuntimeError("could not determine parameters within given compute budget")
+
+ partition_kernel = create_partition_kernel(receptive_field_size, strides)
+ partition_kernel = torch.repeat_interleave(partition_kernel, num_channels, 1)
+
+ tconv_parameters = {
+ 'kernel': partition_kernel,
+ 'receptive_field_shape': receptive_field_size,
+ 'stride': strides,
+ 'left_padding': left_paddings,
+ 'num_dims': num_dims
+ }
+
+ return tconv_parameters
+
+
+
+def relegance_map_relevance_to_input_domain(od_relevance, tconv_parameters):
+ """ maps output-domain relevance to input-domain relevance via transpose convolution
+
+ Args:
+ od_relevance (torch.tensor): output-domain relevance
+ tconv_parameters (dict): parameter dict as created by relegance_create_tconv_kernel
+
+ Returns:
+ torch.tensor: input-domain relevance. The tensor is left aligned, i.e. the all-zero index of the output corresponds to the all-zero index of the discriminator input.
+ Otherwise, the size of the output tensor does not need to match the size of the discriminator input. Use relegance_resize_relevance_to_input_size for a
+ convenient way to adjust the output to the correct size.
+
+ Raises:
+ ValueError: if number of dimensions is not supported
+ """
+
+ kernel = tconv_parameters['kernel'].to(od_relevance.device)
+ rf_shape = tconv_parameters['receptive_field_shape']
+ stride = tconv_parameters['stride']
+ left_padding = tconv_parameters['left_padding']
+
+ num_dims = len(kernel.shape) - 2
+
+ # repeat boundary values
+ od_padding = [rf_shape[i//2] // stride[i//2] + 1 for i in range(2 * num_dims)]
+ padded_od_relevance = F.pad(od_relevance, od_padding[::-1], mode='replicate')
+ od_padding = od_padding[::2]
+
+ # apply mapping and left trimming
+ if num_dims == 1:
+ id_relevance = F.conv_transpose1d(padded_od_relevance, kernel, stride=stride)
+ id_relevance = id_relevance[..., left_padding[0] + stride[0] * od_padding[0] :]
+ elif num_dims == 2:
+ id_relevance = F.conv_transpose2d(padded_od_relevance, kernel, stride=stride)
+ id_relevance = id_relevance[..., left_padding[0] + stride[0] * od_padding[0] :, left_padding[1] + stride[1] * od_padding[1]:]
+ elif num_dims == 3:
+ id_relevance = F.conv_transpose2d(padded_od_relevance, kernel, stride=stride)
+ id_relevance = id_relevance[..., left_padding[0] + stride[0] * od_padding[0] :, left_padding[1] + stride[1] * od_padding[1]:, left_padding[2] + stride[2] * od_padding[2] :]
+ else:
+ raise ValueError(f'[relegance_map_to_input_domain] error: num_dims = {num_dims} not supported')
+
+ return id_relevance
+
+
+def relegance_resize_relevance_to_input_size(reference_input, relevance):
+ """ adjusts size of relevance tensor to reference input size
+
+ Args:
+ reference_input (torch.tensor): discriminator input tensor for reference
+ relevance (torch.tensor): input-domain relevance corresponding to input tensor reference_input
+
+ Returns:
+ torch.tensor: resized relevance
+
+ Raises:
+ ValueError: if number of dimensions is not supported
+ """
+ resized_relevance = torch.zeros_like(reference_input)
+
+ num_dims = len(reference_input.shape) - 2
+ with torch.no_grad():
+ if num_dims == 1:
+ resized_relevance[:] = relevance[..., : min(reference_input.size(-1), relevance.size(-1))]
+ elif num_dims == 2:
+ resized_relevance[:] = relevance[..., : min(reference_input.size(-2), relevance.size(-2)), : min(reference_input.size(-1), relevance.size(-1))]
+ elif num_dims == 3:
+ resized_relevance[:] = relevance[..., : min(reference_input.size(-3), relevance.size(-3)), : min(reference_input.size(-2), relevance.size(-2)), : min(reference_input.size(-1), relevance.size(-1))]
+ else:
+ raise ValueError(f'[relegance_map_to_input_domain] error: num_dims = {num_dims} not supported')
+
+ return resized_relevance \ No newline at end of file
diff --git a/dnn/torch/dnntools/dnntools/sparsification/__init__.py b/dnn/torch/dnntools/dnntools/sparsification/__init__.py
new file mode 100644
index 00000000..fcc91746
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/sparsification/__init__.py
@@ -0,0 +1,6 @@
+from .gru_sparsifier import GRUSparsifier
+from .conv1d_sparsifier import Conv1dSparsifier
+from .conv_transpose1d_sparsifier import ConvTranspose1dSparsifier
+from .linear_sparsifier import LinearSparsifier
+from .common import sparsify_matrix, calculate_gru_flops_per_step
+from .utils import mark_for_sparsification, create_sparsifier \ No newline at end of file
diff --git a/dnn/torch/dnntools/dnntools/sparsification/base_sparsifier.py b/dnn/torch/dnntools/dnntools/sparsification/base_sparsifier.py
new file mode 100644
index 00000000..dd62f40b
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/sparsification/base_sparsifier.py
@@ -0,0 +1,58 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+class BaseSparsifier:
+ def __init__(self, task_list, start, stop, interval, exponent=3):
+
+ # just copying parameters...
+ self.start = start
+ self.stop = stop
+ self.interval = interval
+ self.exponent = exponent
+ self.task_list = task_list
+
+ # ... and setting counter to 0
+ self.step_counter = 0
+
+ def step(self, verbose=False):
+ # compute current interpolation factor
+ self.step_counter += 1
+
+ if self.step_counter < self.start:
+ return
+ elif self.step_counter < self.stop:
+ # update only every self.interval-th interval
+ if self.step_counter % self.interval:
+ return
+
+ alpha = ((self.stop - self.step_counter) / (self.stop - self.start)) ** self.exponent
+ else:
+ alpha = 0
+
+ self.sparsify(alpha, verbose=verbose) \ No newline at end of file
diff --git a/dnn/torch/dnntools/dnntools/sparsification/common.py b/dnn/torch/dnntools/dnntools/sparsification/common.py
new file mode 100644
index 00000000..47181800
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/sparsification/common.py
@@ -0,0 +1,123 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+
+debug=True
+
+def sparsify_matrix(matrix : torch.tensor, density : float, block_size, keep_diagonal : bool=False, return_mask : bool=False):
+ """ sparsifies matrix with specified block size
+
+ Parameters:
+ -----------
+ matrix : torch.tensor
+ matrix to sparsify
+ density : int
+ target density
+ block_size : [int, int]
+ block size dimensions
+ keep_diagonal : bool
+ If true, the diagonal will be kept. This option requires block_size[0] == block_size[1] and defaults to False
+ """
+
+ m, n = matrix.shape
+ m1, n1 = block_size
+
+ if m % m1 or n % n1:
+ raise ValueError(f"block size {(m1, n1)} does not divide matrix size {(m, n)}")
+
+ # extract diagonal if keep_diagonal = True
+ if keep_diagonal:
+ if m != n:
+ raise ValueError("Attempting to sparsify non-square matrix with keep_diagonal=True")
+
+ to_spare = torch.diag(torch.diag(matrix))
+ matrix = matrix - to_spare
+ else:
+ to_spare = torch.zeros_like(matrix)
+
+ # calculate energy in sub-blocks
+ x = torch.reshape(matrix, (m // m1, m1, n // n1, n1))
+ x = x ** 2
+ block_energies = torch.sum(torch.sum(x, dim=3), dim=1)
+
+ number_of_blocks = (m * n) // (m1 * n1)
+ number_of_survivors = round(number_of_blocks * density)
+
+ # masking threshold
+ if number_of_survivors == 0:
+ threshold = 0
+ else:
+ threshold = torch.sort(torch.flatten(block_energies)).values[-number_of_survivors]
+
+ # create mask
+ mask = torch.ones_like(block_energies)
+ mask[block_energies < threshold] = 0
+ mask = torch.repeat_interleave(mask, m1, dim=0)
+ mask = torch.repeat_interleave(mask, n1, dim=1)
+
+ # perform masking
+ masked_matrix = mask * matrix + to_spare
+
+ if return_mask:
+ return masked_matrix, mask
+ else:
+ return masked_matrix
+
+def calculate_gru_flops_per_step(gru, sparsification_dict=dict(), drop_input=False):
+ input_size = gru.input_size
+ hidden_size = gru.hidden_size
+ flops = 0
+
+ input_density = (
+ sparsification_dict.get('W_ir', [1])[0]
+ + sparsification_dict.get('W_in', [1])[0]
+ + sparsification_dict.get('W_iz', [1])[0]
+ ) / 3
+
+ recurrent_density = (
+ sparsification_dict.get('W_hr', [1])[0]
+ + sparsification_dict.get('W_hn', [1])[0]
+ + sparsification_dict.get('W_hz', [1])[0]
+ ) / 3
+
+ # input matrix vector multiplications
+ if not drop_input:
+ flops += 2 * 3 * input_size * hidden_size * input_density
+
+ # recurrent matrix vector multiplications
+ flops += 2 * 3 * hidden_size * hidden_size * recurrent_density
+
+ # biases
+ flops += 6 * hidden_size
+
+ # activations estimated by 10 flops per activation
+ flops += 30 * hidden_size
+
+ return flops
diff --git a/dnn/torch/dnntools/dnntools/sparsification/conv1d_sparsifier.py b/dnn/torch/dnntools/dnntools/sparsification/conv1d_sparsifier.py
new file mode 100644
index 00000000..1ac51d0d
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/sparsification/conv1d_sparsifier.py
@@ -0,0 +1,133 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+
+from .base_sparsifier import BaseSparsifier
+from .common import sparsify_matrix, debug
+
+
+class Conv1dSparsifier(BaseSparsifier):
+ def __init__(self, task_list, start, stop, interval, exponent=3):
+ """ Sparsifier for torch.nn.GRUs
+
+ Parameters:
+ -----------
+ task_list : list
+ task_list contains a list of tuples (conv1d, params), where conv1d is an instance
+ of torch.nn.Conv1d and params is a tuple (density, [m, n]),
+ where density is the target density in [0, 1], [m, n] is the shape sub-blocks to which
+ sparsification is applied.
+
+ start : int
+ training step after which sparsification will be started.
+
+ stop : int
+ training step after which sparsification will be completed.
+
+ interval : int
+ sparsification interval for steps between start and stop. After stop sparsification will be
+ carried out after every call to GRUSparsifier.step()
+
+ exponent : float
+ Interpolation exponent for sparsification interval. In step i sparsification will be carried out
+ with density (alpha + target_density * (1 * alpha)), where
+ alpha = ((stop - i) / (start - stop)) ** exponent
+
+ Example:
+ --------
+ >>> import torch
+ >>> conv = torch.nn.Conv1d(8, 16, 8)
+ >>> params = (0.2, [8, 4])
+ >>> sparsifier = Conv1dSparsifier([(conv, params)], 0, 100, 50)
+ >>> for i in range(100):
+ ... sparsifier.step()
+ """
+ super().__init__(task_list, start, stop, interval, exponent=3)
+
+ self.last_mask = None
+
+
+ def sparsify(self, alpha, verbose=False):
+ """ carries out sparsification step
+
+ Call this function after optimizer.step in your
+ training loop.
+
+ Parameters:
+ ----------
+ alpha : float
+ density interpolation parameter (1: dense, 0: target density)
+ verbose : bool
+ if true, densities are printed out
+
+ Returns:
+ --------
+ None
+
+ """
+
+ with torch.no_grad():
+ for conv, params in self.task_list:
+ # reshape weight
+ if hasattr(conv, 'weight_v'):
+ weight = conv.weight_v
+ else:
+ weight = conv.weight
+ i, o, k = weight.shape
+ w = weight.permute(0, 2, 1).flatten(1)
+ target_density, block_size = params
+ density = alpha + (1 - alpha) * target_density
+ w, new_mask = sparsify_matrix(w, density, block_size, return_mask=True)
+ w = w.reshape(i, k, o).permute(0, 2, 1)
+ weight[:] = w
+
+ if self.last_mask is not None:
+ if not torch.all(self.last_mask * new_mask == new_mask) and debug:
+ print("weight resurrection in conv.weight")
+
+ self.last_mask = new_mask
+
+ if verbose:
+ print(f"conv1d_sparsier[{self.step_counter}]: {density=}")
+
+
+if __name__ == "__main__":
+ print("Testing sparsifier")
+
+ import torch
+ conv = torch.nn.Conv1d(8, 16, 8)
+ params = (0.2, [8, 4])
+
+ sparsifier = Conv1dSparsifier([(conv, params)], 0, 100, 5)
+
+ for i in range(100):
+ sparsifier.step(verbose=True)
+
+ print(conv.weight)
diff --git a/dnn/torch/dnntools/dnntools/sparsification/conv_transpose1d_sparsifier.py b/dnn/torch/dnntools/dnntools/sparsification/conv_transpose1d_sparsifier.py
new file mode 100644
index 00000000..6d9398f2
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/sparsification/conv_transpose1d_sparsifier.py
@@ -0,0 +1,134 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+
+
+from .base_sparsifier import BaseSparsifier
+from .common import sparsify_matrix, debug
+
+
+class ConvTranspose1dSparsifier(BaseSparsifier):
+ def __init__(self, task_list, start, stop, interval, exponent=3):
+ """ Sparsifier for torch.nn.GRUs
+
+ Parameters:
+ -----------
+ task_list : list
+ task_list contains a list of tuples (conv1d, params), where conv1d is an instance
+ of torch.nn.Conv1d and params is a tuple (density, [m, n]),
+ where density is the target density in [0, 1], [m, n] is the shape sub-blocks to which
+ sparsification is applied.
+
+ start : int
+ training step after which sparsification will be started.
+
+ stop : int
+ training step after which sparsification will be completed.
+
+ interval : int
+ sparsification interval for steps between start and stop. After stop sparsification will be
+ carried out after every call to GRUSparsifier.step()
+
+ exponent : float
+ Interpolation exponent for sparsification interval. In step i sparsification will be carried out
+ with density (alpha + target_density * (1 * alpha)), where
+ alpha = ((stop - i) / (start - stop)) ** exponent
+
+ Example:
+ --------
+ >>> import torch
+ >>> conv = torch.nn.ConvTranspose1d(8, 16, 8)
+ >>> params = (0.2, [8, 4])
+ >>> sparsifier = ConvTranspose1dSparsifier([(conv, params)], 0, 100, 50)
+ >>> for i in range(100):
+ ... sparsifier.step()
+ """
+
+ super().__init__(task_list, start, stop, interval, exponent=3)
+
+ self.last_mask = None
+
+ def sparsify(self, alpha, verbose=False):
+ """ carries out sparsification step
+
+ Call this function after optimizer.step in your
+ training loop.
+
+ Parameters:
+ ----------
+ alpha : float
+ density interpolation parameter (1: dense, 0: target density)
+ verbose : bool
+ if true, densities are printed out
+
+ Returns:
+ --------
+ None
+
+ """
+
+ with torch.no_grad():
+ for conv, params in self.task_list:
+ # reshape weight
+ if hasattr(conv, 'weight_v'):
+ weight = conv.weight_v
+ else:
+ weight = conv.weight
+ i, o, k = weight.shape
+ w = weight.permute(2, 1, 0).reshape(k * o, i)
+ target_density, block_size = params
+ density = alpha + (1 - alpha) * target_density
+ w, new_mask = sparsify_matrix(w, density, block_size, return_mask=True)
+ w = w.reshape(k, o, i).permute(2, 1, 0)
+ weight[:] = w
+
+ if self.last_mask is not None:
+ if not torch.all(self.last_mask * new_mask == new_mask) and debug:
+ print("weight resurrection in conv.weight")
+
+ self.last_mask = new_mask
+
+ if verbose:
+ print(f"convtrans1d_sparsier[{self.step_counter}]: {density=}")
+
+
+if __name__ == "__main__":
+ print("Testing sparsifier")
+
+ import torch
+ conv = torch.nn.ConvTranspose1d(8, 16, 4, 4)
+ params = (0.2, [8, 4])
+
+ sparsifier = ConvTranspose1dSparsifier([(conv, params)], 0, 100, 5)
+
+ for i in range(100):
+ sparsifier.step(verbose=True)
+
+ print(conv.weight)
diff --git a/dnn/torch/dnntools/dnntools/sparsification/gru_sparsifier.py b/dnn/torch/dnntools/dnntools/sparsification/gru_sparsifier.py
new file mode 100644
index 00000000..417b04be
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/sparsification/gru_sparsifier.py
@@ -0,0 +1,178 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+
+from .base_sparsifier import BaseSparsifier
+from .common import sparsify_matrix, debug
+
+
+class GRUSparsifier(BaseSparsifier):
+ def __init__(self, task_list, start, stop, interval, exponent=3):
+ """ Sparsifier for torch.nn.GRUs
+
+ Parameters:
+ -----------
+ task_list : list
+ task_list contains a list of tuples (gru, sparsify_dict), where gru is an instance
+ of torch.nn.GRU and sparsify_dic is a dictionary with keys in {'W_ir', 'W_iz', 'W_in',
+ 'W_hr', 'W_hz', 'W_hn'} corresponding to the input and recurrent weights for the reset,
+ update, and new gate. The values of sparsify_dict are tuples (density, [m, n], keep_diagonal),
+ where density is the target density in [0, 1], [m, n] is the shape sub-blocks to which
+ sparsification is applied and keep_diagonal is a bool variable indicating whether the diagonal
+ should be kept.
+
+ start : int
+ training step after which sparsification will be started.
+
+ stop : int
+ training step after which sparsification will be completed.
+
+ interval : int
+ sparsification interval for steps between start and stop. After stop sparsification will be
+ carried out after every call to GRUSparsifier.step()
+
+ exponent : float
+ Interpolation exponent for sparsification interval. In step i sparsification will be carried out
+ with density (alpha + target_density * (1 * alpha)), where
+ alpha = ((stop - i) / (start - stop)) ** exponent
+
+ Example:
+ --------
+ >>> import torch
+ >>> gru = torch.nn.GRU(10, 20)
+ >>> sparsify_dict = {
+ ... 'W_ir' : (0.5, [2, 2], False),
+ ... 'W_iz' : (0.6, [2, 2], False),
+ ... 'W_in' : (0.7, [2, 2], False),
+ ... 'W_hr' : (0.1, [4, 4], True),
+ ... 'W_hz' : (0.2, [4, 4], True),
+ ... 'W_hn' : (0.3, [4, 4], True),
+ ... }
+ >>> sparsifier = GRUSparsifier([(gru, sparsify_dict)], 0, 100, 50)
+ >>> for i in range(100):
+ ... sparsifier.step()
+ """
+ super().__init__(task_list, start, stop, interval, exponent=3)
+
+ self.last_masks = {key : None for key in ['W_ir', 'W_in', 'W_iz', 'W_hr', 'W_hn', 'W_hz']}
+
+ def sparsify(self, alpha, verbose=False):
+ """ carries out sparsification step
+
+ Call this function after optimizer.step in your
+ training loop.
+
+ Parameters:
+ ----------
+ alpha : float
+ density interpolation parameter (1: dense, 0: target density)
+ verbose : bool
+ if true, densities are printed out
+
+ Returns:
+ --------
+ None
+
+ """
+
+ with torch.no_grad():
+ for gru, params in self.task_list:
+ hidden_size = gru.hidden_size
+
+ # input weights
+ for i, key in enumerate(['W_ir', 'W_iz', 'W_in']):
+ if key in params:
+ if hasattr(gru, 'weight_ih_l0_v'):
+ weight = gru.weight_ih_l0_v
+ else:
+ weight = gru.weight_ih_l0
+ density = alpha + (1 - alpha) * params[key][0]
+ if verbose:
+ print(f"[{self.step_counter}]: {key} density: {density}")
+
+ weight[i * hidden_size : (i+1) * hidden_size, : ], new_mask = sparsify_matrix(
+ weight[i * hidden_size : (i + 1) * hidden_size, : ],
+ density, # density
+ params[key][1], # block_size
+ params[key][2], # keep_diagonal (might want to set this to False)
+ return_mask=True
+ )
+
+ if type(self.last_masks[key]) != type(None):
+ if not torch.all(self.last_masks[key] * new_mask == new_mask) and debug:
+ print("weight resurrection in weight_ih_l0_v")
+
+ self.last_masks[key] = new_mask
+
+ # recurrent weights
+ for i, key in enumerate(['W_hr', 'W_hz', 'W_hn']):
+ if key in params:
+ if hasattr(gru, 'weight_hh_l0_v'):
+ weight = gru.weight_hh_l0_v
+ else:
+ weight = gru.weight_hh_l0
+ density = alpha + (1 - alpha) * params[key][0]
+ if verbose:
+ print(f"[{self.step_counter}]: {key} density: {density}")
+ weight[i * hidden_size : (i+1) * hidden_size, : ], new_mask = sparsify_matrix(
+ weight[i * hidden_size : (i + 1) * hidden_size, : ],
+ density,
+ params[key][1], # block_size
+ params[key][2], # keep_diagonal (might want to set this to False)
+ return_mask=True
+ )
+
+ if type(self.last_masks[key]) != type(None):
+ if not torch.all(self.last_masks[key] * new_mask == new_mask) and True:
+ print("weight resurrection in weight_hh_l0_v")
+
+ self.last_masks[key] = new_mask
+
+
+
+if __name__ == "__main__":
+ print("Testing sparsifier")
+
+ gru = torch.nn.GRU(10, 20)
+ sparsify_dict = {
+ 'W_ir' : (0.5, [2, 2], False),
+ 'W_iz' : (0.6, [2, 2], False),
+ 'W_in' : (0.7, [2, 2], False),
+ 'W_hr' : (0.1, [4, 4], True),
+ 'W_hz' : (0.2, [4, 4], True),
+ 'W_hn' : (0.3, [4, 4], True),
+ }
+
+ sparsifier = GRUSparsifier([(gru, sparsify_dict)], 0, 100, 10)
+
+ for i in range(100):
+ sparsifier.step(verbose=True)
+
+ print(gru.weight_hh_l0)
diff --git a/dnn/torch/dnntools/dnntools/sparsification/linear_sparsifier.py b/dnn/torch/dnntools/dnntools/sparsification/linear_sparsifier.py
new file mode 100644
index 00000000..59251ddd
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/sparsification/linear_sparsifier.py
@@ -0,0 +1,128 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+
+from .base_sparsifier import BaseSparsifier
+from .common import sparsify_matrix
+
+
+class LinearSparsifier(BaseSparsifier):
+ def __init__(self, task_list, start, stop, interval, exponent=3):
+ """ Sparsifier for torch.nn.GRUs
+
+ Parameters:
+ -----------
+ task_list : list
+ task_list contains a list of tuples (linear, params), where linear is an instance
+ of torch.nn.Linear and params is a tuple (density, [m, n]),
+ where density is the target density in [0, 1], [m, n] is the shape sub-blocks to which
+ sparsification is applied.
+
+ start : int
+ training step after which sparsification will be started.
+
+ stop : int
+ training step after which sparsification will be completed.
+
+ interval : int
+ sparsification interval for steps between start and stop. After stop sparsification will be
+ carried out after every call to GRUSparsifier.step()
+
+ exponent : float
+ Interpolation exponent for sparsification interval. In step i sparsification will be carried out
+ with density (alpha + target_density * (1 * alpha)), where
+ alpha = ((stop - i) / (start - stop)) ** exponent
+
+ Example:
+ --------
+ >>> import torch
+ >>> linear = torch.nn.Linear(8, 16)
+ >>> params = (0.2, [8, 4])
+ >>> sparsifier = LinearSparsifier([(linear, params)], 0, 100, 50)
+ >>> for i in range(100):
+ ... sparsifier.step()
+ """
+
+ super().__init__(task_list, start, stop, interval, exponent=3)
+
+ self.last_mask = None
+
+ def sparsify(self, alpha, verbose=False):
+ """ carries out sparsification step
+
+ Call this function after optimizer.step in your
+ training loop.
+
+ Parameters:
+ ----------
+ alpha : float
+ density interpolation parameter (1: dense, 0: target density)
+ verbose : bool
+ if true, densities are printed out
+
+ Returns:
+ --------
+ None
+
+ """
+
+ with torch.no_grad():
+ for linear, params in self.task_list:
+ if hasattr(linear, 'weight_v'):
+ weight = linear.weight_v
+ else:
+ weight = linear.weight
+ target_density, block_size = params
+ density = alpha + (1 - alpha) * target_density
+ weight[:], new_mask = sparsify_matrix(weight, density, block_size, return_mask=True)
+
+ if self.last_mask is not None:
+ if not torch.all(self.last_mask * new_mask == new_mask) and debug:
+ print("weight resurrection in conv.weight")
+
+ self.last_mask = new_mask
+
+ if verbose:
+ print(f"linear_sparsifier[{self.step_counter}]: {density=}")
+
+
+if __name__ == "__main__":
+ print("Testing sparsifier")
+
+ import torch
+ linear = torch.nn.Linear(8, 16)
+ params = (0.2, [4, 2])
+
+ sparsifier = LinearSparsifier([(linear, params)], 0, 100, 5)
+
+ for i in range(100):
+ sparsifier.step(verbose=True)
+
+ print(linear.weight)
diff --git a/dnn/torch/dnntools/dnntools/sparsification/utils.py b/dnn/torch/dnntools/dnntools/sparsification/utils.py
new file mode 100644
index 00000000..42f22353
--- /dev/null
+++ b/dnn/torch/dnntools/dnntools/sparsification/utils.py
@@ -0,0 +1,64 @@
+import torch
+
+from dnntools.sparsification import GRUSparsifier, LinearSparsifier, Conv1dSparsifier, ConvTranspose1dSparsifier
+
+def mark_for_sparsification(module, params):
+ setattr(module, 'sparsify', True)
+ setattr(module, 'sparsification_params', params)
+ return module
+
+def create_sparsifier(module, start, stop, interval):
+ sparsifier_list = []
+ for m in module.modules():
+ if hasattr(m, 'sparsify'):
+ if isinstance(m, torch.nn.GRU):
+ sparsifier_list.append(
+ GRUSparsifier([(m, m.sparsification_params)], start, stop, interval)
+ )
+ elif isinstance(m, torch.nn.Linear):
+ sparsifier_list.append(
+ LinearSparsifier([(m, m.sparsification_params)], start, stop, interval)
+ )
+ elif isinstance(m, torch.nn.Conv1d):
+ sparsifier_list.append(
+ Conv1dSparsifier([(m, m.sparsification_params)], start, stop, interval)
+ )
+ elif isinstance(m, torch.nn.ConvTranspose1d):
+ sparsifier_list.append(
+ ConvTranspose1dSparsifier([(m, m.sparsification_params)], start, stop, interval)
+ )
+ else:
+ print(f"[create_sparsifier] warning: module {m} marked for sparsification but no suitable sparsifier exists.")
+
+ def sparsify(verbose=False):
+ for sparsifier in sparsifier_list:
+ sparsifier.step(verbose)
+
+ return sparsify
+
+
+def count_parameters(model, verbose=False):
+ total = 0
+ for name, p in model.named_parameters():
+ count = torch.ones_like(p).sum().item()
+
+ if verbose:
+ print(f"{name}: {count} parameters")
+
+ total += count
+
+ return total
+
+def estimate_nonzero_parameters(module):
+ num_zero_parameters = 0
+ if hasattr(module, 'sparsify'):
+ params = module.sparsification_params
+ if isinstance(module, torch.nn.Conv1d) or isinstance(module, torch.nn.ConvTranspose1d):
+ num_zero_parameters = torch.ones_like(module.weight).sum().item() * (1 - params[0])
+ elif isinstance(module, torch.nn.GRU):
+ num_zero_parameters = module.input_size * module.hidden_size * (3 - params['W_ir'][0] - params['W_iz'][0] - params['W_in'][0])
+ num_zero_parameters += module.hidden_size * module.hidden_size * (3 - params['W_hr'][0] - params['W_hz'][0] - params['W_hn'][0])
+ elif isinstance(module, torch.nn.Linear):
+ num_zero_parameters = module.in_features * module.out_features * params[0]
+ else:
+ raise ValueError(f'unknown sparsification method for module of type {type(module)}')
diff --git a/dnn/torch/dnntools/requirements.txt b/dnn/torch/dnntools/requirements.txt
new file mode 100644
index 00000000..08ed5eeb
--- /dev/null
+++ b/dnn/torch/dnntools/requirements.txt
@@ -0,0 +1 @@
+torch \ No newline at end of file
diff --git a/dnn/torch/dnntools/setup.py b/dnn/torch/dnntools/setup.py
new file mode 100644
index 00000000..bc4ef3f1
--- /dev/null
+++ b/dnn/torch/dnntools/setup.py
@@ -0,0 +1,48 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+#!/usr/bin/env/python
+import os
+from setuptools import setup
+
+lib_folder = os.path.dirname(os.path.realpath(__file__))
+
+with open(os.path.join(lib_folder, 'requirements.txt'), 'r') as f:
+ install_requires = list(f.read().splitlines())
+
+print(install_requires)
+
+setup(name='dnntools',
+ version='1.0',
+ author='Jan Buethe',
+ author_email='jbuethe@amazon.de',
+ description='Non-Standard tools for deep neural network training with PyTorch',
+ packages=['dnntools', 'dnntools.sparsification', 'dnntools.quantization'],
+ install_requires=install_requires
+ )
diff --git a/dnn/torch/fargan/adv_train_fargan.py b/dnn/torch/fargan/adv_train_fargan.py
new file mode 100644
index 00000000..c2977644
--- /dev/null
+++ b/dnn/torch/fargan/adv_train_fargan.py
@@ -0,0 +1,277 @@
+import os
+import argparse
+import random
+import numpy as np
+import sys
+import math as m
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import tqdm
+
+import fargan
+from dataset import FARGANDataset
+from stft_loss import *
+
+source_dir = os.path.split(os.path.abspath(__file__))[0]
+sys.path.append(os.path.join(source_dir, "../osce/"))
+
+import models as osce_models
+
+
+def fmap_loss(scores_real, scores_gen):
+ num_discs = len(scores_real)
+ loss_feat = 0
+ for k in range(num_discs):
+ num_layers = len(scores_gen[k]) - 1
+ f = 4 / num_discs / num_layers
+ for l in range(num_layers):
+ loss_feat += f * F.l1_loss(scores_gen[k][l], scores_real[k][l].detach())
+
+ return loss_feat
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('features', type=str, help='path to feature file in .f32 format')
+parser.add_argument('signal', type=str, help='path to signal file in .s16 format')
+parser.add_argument('output', type=str, help='path to output folder')
+
+parser.add_argument('--suffix', type=str, help="model name suffix", default="")
+parser.add_argument('--cuda-visible-devices', type=str, help="comma separates list of cuda visible device indices, default: CUDA_VISIBLE_DEVICES", default=None)
+
+
+model_group = parser.add_argument_group(title="model parameters")
+model_group.add_argument('--cond-size', type=int, help="first conditioning size, default: 256", default=256)
+model_group.add_argument('--gamma', type=float, help="Use A(z/gamma), default: 0.9", default=0.9)
+
+training_group = parser.add_argument_group(title="training parameters")
+training_group.add_argument('--batch-size', type=int, help="batch size, default: 128", default=128)
+training_group.add_argument('--lr', type=float, help='learning rate, default: 5e-4', default=5e-4)
+training_group.add_argument('--epochs', type=int, help='number of training epochs, default: 50', default=50)
+training_group.add_argument('--sequence-length', type=int, help='sequence length, default: 60', default=60)
+training_group.add_argument('--lr-decay', type=float, help='learning rate decay factor, default: 0.0', default=0.0)
+training_group.add_argument('--initial-checkpoint', type=str, help='initial checkpoint to start training from, default: None', default=None)
+training_group.add_argument('--reg-weight', type=float, help='regression loss weight, default: 1.0', default=1.0)
+training_group.add_argument('--fmap-weight', type=float, help='feature matchin loss weight, default: 1.0', default=1.)
+
+args = parser.parse_args()
+
+if args.cuda_visible_devices != None:
+ os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_visible_devices
+
+# checkpoints
+checkpoint_dir = os.path.join(args.output, 'checkpoints')
+checkpoint = dict()
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+
+# training parameters
+batch_size = args.batch_size
+lr = args.lr
+epochs = args.epochs
+sequence_length = args.sequence_length
+lr_decay = args.lr_decay
+
+adam_betas = [0.8, 0.99]
+adam_eps = 1e-8
+features_file = args.features
+signal_file = args.signal
+
+# model parameters
+cond_size = args.cond_size
+
+
+checkpoint['batch_size'] = batch_size
+checkpoint['lr'] = lr
+checkpoint['lr_decay'] = lr_decay
+checkpoint['epochs'] = epochs
+checkpoint['sequence_length'] = sequence_length
+checkpoint['adam_betas'] = adam_betas
+
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+checkpoint['model_args'] = ()
+checkpoint['model_kwargs'] = {'cond_size': cond_size, 'gamma': args.gamma}
+print(checkpoint['model_kwargs'])
+model = fargan.FARGAN(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+
+
+#discriminator
+disc_name = 'fdmresdisc'
+disc = osce_models.model_dict[disc_name](
+ architecture='free',
+ design='f_down',
+ fft_sizes_16k=[2**n for n in range(6, 12)],
+ freq_roi=[0, 7400],
+ max_channels=256,
+ noise_gain=0.0
+)
+
+if type(args.initial_checkpoint) != type(None):
+ checkpoint = torch.load(args.initial_checkpoint, map_location='cpu')
+ model.load_state_dict(checkpoint['state_dict'], strict=False)
+
+checkpoint['state_dict'] = model.state_dict()
+
+
+dataset = FARGANDataset(features_file, signal_file, sequence_length=sequence_length)
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4)
+
+
+optimizer = torch.optim.AdamW(model.parameters(), lr=lr, betas=adam_betas, eps=adam_eps)
+optimizer_disc = torch.optim.AdamW([p for p in disc.parameters() if p.requires_grad], lr=lr, betas=adam_betas, eps=adam_eps)
+
+
+# learning rate scheduler
+scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay * x))
+scheduler_disc = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer_disc, lr_lambda=lambda x : 1 / (1 + lr_decay * x))
+
+states = None
+
+spect_loss = MultiResolutionSTFTLoss(device).to(device)
+
+for param in model.parameters():
+ param.requires_grad = False
+
+batch_count = 0
+if __name__ == '__main__':
+ model.to(device)
+ disc.to(device)
+
+ for epoch in range(1, epochs + 1):
+
+ m_r = 0
+ m_f = 0
+ s_r = 1
+ s_f = 1
+
+ running_cont_loss = 0
+ running_disc_loss = 0
+ running_gen_loss = 0
+ running_fmap_loss = 0
+ running_reg_loss = 0
+ running_wc = 0
+
+ print(f"training epoch {epoch}...")
+ with tqdm.tqdm(dataloader, unit='batch') as tepoch:
+ for i, (features, periods, target, lpc) in enumerate(tepoch):
+ if epoch == 1 and i == 400:
+ for param in model.parameters():
+ param.requires_grad = True
+ for param in model.cond_net.parameters():
+ param.requires_grad = False
+ for param in model.sig_net.cond_gain_dense.parameters():
+ param.requires_grad = False
+
+ optimizer.zero_grad()
+ features = features.to(device)
+ #lpc = lpc.to(device)
+ #lpc = lpc*(args.gamma**torch.arange(1,17, device=device))
+ #lpc = fargan.interp_lpc(lpc, 4)
+ periods = periods.to(device)
+ if True:
+ target = target[:, :sequence_length*160]
+ #lpc = lpc[:,:sequence_length*4,:]
+ features = features[:,:sequence_length+4,:]
+ periods = periods[:,:sequence_length+4]
+ else:
+ target=target[::2, :]
+ #lpc=lpc[::2,:]
+ features=features[::2,:]
+ periods=periods[::2,:]
+ target = target.to(device)
+ #target = fargan.analysis_filter(target, lpc[:,:,:], nb_subframes=1, gamma=args.gamma)
+
+ #nb_pre = random.randrange(1, 6)
+ nb_pre = 2
+ pre = target[:, :nb_pre*160]
+ output, _ = model(features, periods, target.size(1)//160 - nb_pre, pre=pre, states=None)
+ output = torch.cat([pre, output], -1)
+
+
+ # discriminator update
+ scores_gen = disc(output.detach().unsqueeze(1))
+ scores_real = disc(target.unsqueeze(1))
+
+ disc_loss = 0
+ for scale in scores_gen:
+ disc_loss += ((scale[-1]) ** 2).mean()
+ m_f = 0.9 * m_f + 0.1 * scale[-1].detach().mean().cpu().item()
+ s_f = 0.9 * s_f + 0.1 * scale[-1].detach().std().cpu().item()
+
+ for scale in scores_real:
+ disc_loss += ((1 - scale[-1]) ** 2).mean()
+ m_r = 0.9 * m_r + 0.1 * scale[-1].detach().mean().cpu().item()
+ s_r = 0.9 * s_r + 0.1 * scale[-1].detach().std().cpu().item()
+
+ disc_loss = 0.5 * disc_loss / len(scores_gen)
+ winning_chance = 0.5 * m.erfc( (m_r - m_f) / m.sqrt(2 * (s_f**2 + s_r**2)) )
+ running_wc += winning_chance
+
+ disc.zero_grad()
+ disc_loss.backward()
+ optimizer_disc.step()
+
+ # model update
+ scores_gen = disc(output.unsqueeze(1))
+ if False: # todo: check whether that makes a difference
+ with torch.no_grad():
+ scores_real = disc(target.unsqueeze(1))
+
+ cont_loss = fargan.sig_loss(target[:, nb_pre*160:nb_pre*160+80], output[:, nb_pre*160:nb_pre*160+80])
+ specc_loss = spect_loss(output, target.detach())
+ reg_loss = (.00*cont_loss + specc_loss)
+
+ loss_gen = 0
+ for scale in scores_gen:
+ loss_gen += ((1 - scale[-1]) ** 2).mean() / len(scores_gen)
+
+ feat_loss = args.fmap_weight * fmap_loss(scores_real, scores_gen)
+
+ reg_weight = args.reg_weight# + 15./(1 + (batch_count/7600.))
+ gen_loss = reg_weight * reg_loss + feat_loss + loss_gen
+
+ model.zero_grad()
+
+
+ gen_loss.backward()
+ optimizer.step()
+
+ #model.clip_weights()
+
+ scheduler.step()
+ scheduler_disc.step()
+
+ running_cont_loss += cont_loss.detach().cpu().item()
+ running_gen_loss += loss_gen.detach().cpu().item()
+ running_disc_loss += disc_loss.detach().cpu().item()
+ running_fmap_loss += feat_loss.detach().cpu().item()
+ running_reg_loss += reg_loss.detach().cpu().item()
+
+
+
+ tepoch.set_postfix(cont_loss=f"{running_cont_loss/(i+1):8.5f}",
+ reg_weight=f"{reg_weight:8.5f}",
+ gen_loss=f"{running_gen_loss/(i+1):8.5f}",
+ disc_loss=f"{running_disc_loss/(i+1):8.5f}",
+ fmap_loss=f"{running_fmap_loss/(i+1):8.5f}",
+ reg_loss=f"{running_reg_loss/(i+1):8.5f}",
+ wc = f"{running_wc/(i+1):8.5f}",
+ )
+ batch_count = batch_count + 1
+
+ # save checkpoint
+ checkpoint_path = os.path.join(checkpoint_dir, f'fargan{args.suffix}_adv_{epoch}.pth')
+ checkpoint['state_dict'] = model.state_dict()
+ checkpoint['disc_sate_dict'] = disc.state_dict()
+ checkpoint['loss'] = {
+ 'cont': running_cont_loss / len(dataloader),
+ 'gen': running_gen_loss / len(dataloader),
+ 'disc': running_disc_loss / len(dataloader),
+ 'fmap': running_fmap_loss / len(dataloader),
+ 'reg': running_reg_loss / len(dataloader)
+ }
+ checkpoint['epoch'] = epoch
+ torch.save(checkpoint, checkpoint_path)
diff --git a/dnn/torch/fargan/dataset.py b/dnn/torch/fargan/dataset.py
new file mode 100644
index 00000000..2dfbb0b5
--- /dev/null
+++ b/dnn/torch/fargan/dataset.py
@@ -0,0 +1,61 @@
+import torch
+import numpy as np
+import fargan
+
+class FARGANDataset(torch.utils.data.Dataset):
+ def __init__(self,
+ feature_file,
+ signal_file,
+ frame_size=160,
+ sequence_length=15,
+ lookahead=1,
+ nb_used_features=20,
+ nb_features=36):
+
+ self.frame_size = frame_size
+ self.sequence_length = sequence_length
+ self.lookahead = lookahead
+ self.nb_features = nb_features
+ self.nb_used_features = nb_used_features
+ pcm_chunk_size = self.frame_size*self.sequence_length
+
+ self.data = np.memmap(signal_file, dtype='int16', mode='r')
+ #self.data = self.data[1::2]
+ self.nb_sequences = len(self.data)//(pcm_chunk_size)-4
+ self.data = self.data[(4-self.lookahead)*self.frame_size:]
+ self.data = self.data[:self.nb_sequences*pcm_chunk_size]
+
+
+ #self.data = np.reshape(self.data, (self.nb_sequences, pcm_chunk_size))
+ sizeof = self.data.strides[-1]
+ self.data = np.lib.stride_tricks.as_strided(self.data, shape=(self.nb_sequences, pcm_chunk_size*2),
+ strides=(pcm_chunk_size*sizeof, sizeof))
+
+ self.features = np.reshape(np.memmap(feature_file, dtype='float32', mode='r'), (-1, nb_features))
+ sizeof = self.features.strides[-1]
+ self.features = np.lib.stride_tricks.as_strided(self.features, shape=(self.nb_sequences, self.sequence_length*2+4, nb_features),
+ strides=(self.sequence_length*self.nb_features*sizeof, self.nb_features*sizeof, sizeof))
+ #self.periods = np.round(50*self.features[:,:,self.nb_used_features-2]+100).astype('int')
+ self.periods = np.round(np.clip(256./2**(self.features[:,:,self.nb_used_features-2]+1.5), 32, 255)).astype('int')
+
+ self.lpc = self.features[:, :, self.nb_used_features:]
+ self.features = self.features[:, :, :self.nb_used_features]
+ print("lpc_size:", self.lpc.shape)
+
+ def __len__(self):
+ return self.nb_sequences
+
+ def __getitem__(self, index):
+ features = self.features[index, :, :].copy()
+ if self.lookahead != 0:
+ lpc = self.lpc[index, 4-self.lookahead:-self.lookahead, :].copy()
+ else:
+ lpc = self.lpc[index, 4:, :].copy()
+ data = self.data[index, :].copy().astype(np.float32) / 2**15
+ periods = self.periods[index, :].copy()
+ #lpc = lpc*(self.gamma**np.arange(1,17))
+ #lpc=lpc[None,:,:]
+ #lpc = fargan.interp_lpc(lpc, 4)
+ #lpc=lpc[0,:,:]
+
+ return features, periods, data, lpc
diff --git a/dnn/torch/fargan/dump_fargan_weights.py b/dnn/torch/fargan/dump_fargan_weights.py
new file mode 100644
index 00000000..ec1eb9ae
--- /dev/null
+++ b/dnn/torch/fargan/dump_fargan_weights.py
@@ -0,0 +1,112 @@
+import os
+import sys
+import argparse
+
+import torch
+from torch import nn
+
+
+sys.path.append(os.path.join(os.path.split(__file__)[0], '../weight-exchange'))
+import wexchange.torch
+
+import fargan
+#from models import model_dict
+
+unquantized = [ 'cond_net.pembed', 'cond_net.fdense1', 'sig_net.cond_gain_dense', 'sig_net.gain_dense_out' ]
+
+unquantized2 = [
+ 'cond_net.pembed',
+ 'cond_net.fdense1',
+ 'cond_net.fconv1',
+ 'cond_net.fconv2',
+ 'cont_net.0',
+ 'sig_net.cond_gain_dense',
+ 'sig_net.fwc0.conv',
+ 'sig_net.fwc0.glu.gate',
+ 'sig_net.dense1_glu.gate',
+ 'sig_net.gru1_glu.gate',
+ 'sig_net.gru2_glu.gate',
+ 'sig_net.gru3_glu.gate',
+ 'sig_net.skip_glu.gate',
+ 'sig_net.skip_dense',
+ 'sig_net.sig_dense_out',
+ 'sig_net.gain_dense_out'
+]
+
+description=f"""
+This is an unsafe dumping script for FARGAN models. It assumes that all weights are included in Linear, Conv1d or GRU layer
+and will fail to export any other weights.
+
+Furthermore, the quanitze option relies on the following explicit list of layers to be excluded:
+{unquantized}.
+
+Modify this script manually if adjustments are needed.
+"""
+
+parser = argparse.ArgumentParser(description=description)
+parser.add_argument('weightfile', type=str, help='weight file path')
+parser.add_argument('export_folder', type=str)
+parser.add_argument('--export-filename', type=str, default='fargan_data', help='filename for source and header file (.c and .h will be added), defaults to fargan_data')
+parser.add_argument('--struct-name', type=str, default='FARGAN', help='name for C struct, defaults to FARGAN')
+parser.add_argument('--quantize', action='store_true', help='apply quantization')
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ print(f"loading weights from {args.weightfile}...")
+ saved_gen= torch.load(args.weightfile, map_location='cpu')
+ saved_gen['model_args'] = ()
+ saved_gen['model_kwargs'] = {'cond_size': 256, 'gamma': 0.9}
+
+ model = fargan.FARGAN(*saved_gen['model_args'], **saved_gen['model_kwargs'])
+ model.load_state_dict(saved_gen['state_dict'], strict=False)
+ def _remove_weight_norm(m):
+ try:
+ torch.nn.utils.remove_weight_norm(m)
+ except ValueError: # this module didn't have weight norm
+ return
+ model.apply(_remove_weight_norm)
+
+
+ print("dumping model...")
+ quantize_model=args.quantize
+
+ output_folder = args.export_folder
+ os.makedirs(output_folder, exist_ok=True)
+
+ writer = wexchange.c_export.c_writer.CWriter(os.path.join(output_folder, args.export_filename), model_struct_name=args.struct_name)
+
+ for name, module in model.named_modules():
+
+ if quantize_model:
+ quantize=name not in unquantized
+ scale = None if quantize else 1/128
+ else:
+ quantize=False
+ scale=1/128
+
+ if isinstance(module, nn.Linear):
+ print(f"dumping linear layer {name}...")
+ wexchange.torch.dump_torch_dense_weights(writer, module, name.replace('.', '_'), quantize=quantize, scale=scale)
+
+ elif isinstance(module, nn.Conv1d):
+ print(f"dumping conv1d layer {name}...")
+ wexchange.torch.dump_torch_conv1d_weights(writer, module, name.replace('.', '_'), quantize=quantize, scale=scale)
+
+ elif isinstance(module, nn.GRU):
+ print(f"dumping GRU layer {name}...")
+ wexchange.torch.dump_torch_gru_weights(writer, module, name.replace('.', '_'), quantize=quantize, scale=scale, recurrent_scale=scale)
+
+ elif isinstance(module, nn.GRUCell):
+ print(f"dumping GRUCell layer {name}...")
+ wexchange.torch.dump_torch_grucell_weights(writer, module, name.replace('.', '_'), quantize=quantize, scale=scale, recurrent_scale=scale)
+
+ elif isinstance(module, nn.Embedding):
+ print(f"dumping Embedding layer {name}...")
+ wexchange.torch.dump_torch_embedding_weights(writer, module, name.replace('.', '_'), quantize=quantize, scale=scale)
+ #wexchange.torch.dump_torch_embedding_weights(writer, module)
+
+ else:
+ print(f"Ignoring layer {name}...")
+
+ writer.close()
diff --git a/dnn/torch/fargan/fargan.py b/dnn/torch/fargan/fargan.py
new file mode 100644
index 00000000..8dbb694d
--- /dev/null
+++ b/dnn/torch/fargan/fargan.py
@@ -0,0 +1,322 @@
+import numpy as np
+import torch
+from torch import nn
+import torch.nn.functional as F
+import filters
+from torch.nn.utils import weight_norm
+#from convert_lsp import lpc_to_lsp, lsp_to_lpc
+from rc import lpc2rc, rc2lpc
+
+Fs = 16000
+
+fid_dict = {}
+def dump_signal(x, filename):
+ return
+ if filename in fid_dict:
+ fid = fid_dict[filename]
+ else:
+ fid = open(filename, "w")
+ fid_dict[filename] = fid
+ x = x.detach().numpy().astype('float32')
+ x.tofile(fid)
+
+
+def sig_l1(y_true, y_pred):
+ return torch.mean(abs(y_true-y_pred))/torch.mean(abs(y_true))
+
+def sig_loss(y_true, y_pred):
+ t = y_true/(1e-15+torch.norm(y_true, dim=-1, p=2, keepdim=True))
+ p = y_pred/(1e-15+torch.norm(y_pred, dim=-1, p=2, keepdim=True))
+ return torch.mean(1.-torch.sum(p*t, dim=-1))
+
+def interp_lpc(lpc, factor):
+ #print(lpc.shape)
+ #f = (np.arange(factor)+.5*((factor+1)%2))/factor
+ lsp = torch.atanh(lpc2rc(lpc))
+ #print("lsp0:")
+ #print(lsp)
+ shape = lsp.shape
+ #print("shape is", shape)
+ shape = (shape[0], shape[1]*factor, shape[2])
+ interp_lsp = torch.zeros(shape, device=lpc.device)
+ for k in range(factor):
+ f = (k+.5*((factor+1)%2))/factor
+ interp = (1-f)*lsp[:,:-1,:] + f*lsp[:,1:,:]
+ interp_lsp[:,factor//2+k:-(factor//2):factor,:] = interp
+ for k in range(factor//2):
+ interp_lsp[:,k,:] = interp_lsp[:,factor//2,:]
+ for k in range((factor+1)//2):
+ interp_lsp[:,-k-1,:] = interp_lsp[:,-(factor+3)//2,:]
+ #print("lsp:")
+ #print(interp_lsp)
+ return rc2lpc(torch.tanh(interp_lsp))
+
+def analysis_filter(x, lpc, nb_subframes=4, subframe_size=40, gamma=.9):
+ device = x.device
+ batch_size = lpc.size(0)
+
+ nb_frames = lpc.shape[1]
+
+
+ sig = torch.zeros(batch_size, subframe_size+16, device=device)
+ x = torch.reshape(x, (batch_size, nb_frames*nb_subframes, subframe_size))
+ out = torch.zeros((batch_size, 0), device=device)
+
+ #if gamma is not None:
+ # bw = gamma**(torch.arange(1, 17, device=device))
+ # lpc = lpc*bw[None,None,:]
+ ones = torch.ones((*(lpc.shape[:-1]), 1), device=device)
+ zeros = torch.zeros((*(lpc.shape[:-1]), subframe_size-1), device=device)
+ a = torch.cat([ones, lpc], -1)
+ a_big = torch.cat([a, zeros], -1)
+ fir_mat_big = filters.toeplitz_from_filter(a_big)
+
+ #print(a_big[:,0,:])
+ for n in range(nb_frames):
+ for k in range(nb_subframes):
+
+ sig = torch.cat([sig[:,subframe_size:], x[:,n*nb_subframes + k, :]], 1)
+ exc = torch.bmm(fir_mat_big[:,n,:,:], sig[:,:,None])
+ out = torch.cat([out, exc[:,-subframe_size:,0]], 1)
+
+ return out
+
+
+# weight initialization and clipping
+def init_weights(module):
+ if isinstance(module, nn.GRU):
+ for p in module.named_parameters():
+ if p[0].startswith('weight_hh_'):
+ nn.init.orthogonal_(p[1])
+
+def gen_phase_embedding(periods, frame_size):
+ device = periods.device
+ batch_size = periods.size(0)
+ nb_frames = periods.size(1)
+ w0 = 2*torch.pi/periods
+ w0_shift = torch.cat([2*torch.pi*torch.rand((batch_size, 1), device=device)/frame_size, w0[:,:-1]], 1)
+ cum_phase = frame_size*torch.cumsum(w0_shift, 1)
+ fine_phase = w0[:,:,None]*torch.broadcast_to(torch.arange(frame_size, device=device), (batch_size, nb_frames, frame_size))
+ embed = torch.unsqueeze(cum_phase, 2) + fine_phase
+ embed = torch.reshape(embed, (batch_size, -1))
+ return torch.cos(embed), torch.sin(embed)
+
+class GLU(nn.Module):
+ def __init__(self, feat_size):
+ super(GLU, self).__init__()
+
+ torch.manual_seed(5)
+
+ self.gate = weight_norm(nn.Linear(feat_size, feat_size, bias=False))
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d)\
+ or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x):
+
+ out = x * torch.sigmoid(self.gate(x))
+
+ return out
+
+class FWConv(nn.Module):
+ def __init__(self, in_size, out_size, kernel_size=2):
+ super(FWConv, self).__init__()
+
+ torch.manual_seed(5)
+
+ self.in_size = in_size
+ self.kernel_size = kernel_size
+ self.conv = weight_norm(nn.Linear(in_size*self.kernel_size, out_size, bias=False))
+ self.glu = GLU(out_size)
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d)\
+ or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x, state):
+ xcat = torch.cat((state, x), -1)
+ #print(x.shape, state.shape, xcat.shape, self.in_size, self.kernel_size)
+ out = self.glu(torch.tanh(self.conv(xcat)))
+ return out, xcat[:,self.in_size:]
+
+def n(x):
+ return torch.clamp(x + (1./127.)*(torch.rand_like(x)-.5), min=-1., max=1.)
+
+class FARGANCond(nn.Module):
+ def __init__(self, feature_dim=20, cond_size=256, pembed_dims=12):
+ super(FARGANCond, self).__init__()
+
+ self.feature_dim = feature_dim
+ self.cond_size = cond_size
+
+ self.pembed = nn.Embedding(224, pembed_dims)
+ self.fdense1 = nn.Linear(self.feature_dim + pembed_dims, 64, bias=False)
+ self.fconv1 = nn.Conv1d(64, 128, kernel_size=3, padding='valid', bias=False)
+ self.fdense2 = nn.Linear(128, 80*4, bias=False)
+
+ self.apply(init_weights)
+ nb_params = sum(p.numel() for p in self.parameters())
+ print(f"cond model: {nb_params} weights")
+
+ def forward(self, features, period):
+ features = features[:,2:,:]
+ period = period[:,2:]
+ p = self.pembed(period-32)
+ features = torch.cat((features, p), -1)
+ tmp = torch.tanh(self.fdense1(features))
+ tmp = tmp.permute(0, 2, 1)
+ tmp = torch.tanh(self.fconv1(tmp))
+ tmp = tmp.permute(0, 2, 1)
+ tmp = torch.tanh(self.fdense2(tmp))
+ #tmp = torch.tanh(self.fdense2(tmp))
+ return tmp
+
+class FARGANSub(nn.Module):
+ def __init__(self, subframe_size=40, nb_subframes=4, cond_size=256):
+ super(FARGANSub, self).__init__()
+
+ self.subframe_size = subframe_size
+ self.nb_subframes = nb_subframes
+ self.cond_size = cond_size
+ self.cond_gain_dense = nn.Linear(80, 1)
+
+ #self.sig_dense1 = nn.Linear(4*self.subframe_size+self.passthrough_size+self.cond_size, self.cond_size, bias=False)
+ self.fwc0 = FWConv(2*self.subframe_size+80+4, 192)
+ self.gru1 = nn.GRUCell(192+2*self.subframe_size, 160, bias=False)
+ self.gru2 = nn.GRUCell(160+2*self.subframe_size, 128, bias=False)
+ self.gru3 = nn.GRUCell(128+2*self.subframe_size, 128, bias=False)
+
+ self.gru1_glu = GLU(160)
+ self.gru2_glu = GLU(128)
+ self.gru3_glu = GLU(128)
+ self.skip_glu = GLU(128)
+ #self.ptaps_dense = nn.Linear(4*self.cond_size, 5)
+
+ self.skip_dense = nn.Linear(192+160+2*128+2*self.subframe_size, 128, bias=False)
+ self.sig_dense_out = nn.Linear(128, self.subframe_size, bias=False)
+ self.gain_dense_out = nn.Linear(192, 4)
+
+
+ self.apply(init_weights)
+ nb_params = sum(p.numel() for p in self.parameters())
+ print(f"subframe model: {nb_params} weights")
+
+ def forward(self, cond, prev_pred, exc_mem, period, states, gain=None):
+ device = exc_mem.device
+ #print(cond.shape, prev.shape)
+
+ cond = n(cond)
+ dump_signal(gain, 'gain0.f32')
+ gain = torch.exp(self.cond_gain_dense(cond))
+ dump_signal(gain, 'gain1.f32')
+ idx = 256-period[:,None]
+ rng = torch.arange(self.subframe_size+4, device=device)
+ idx = idx + rng[None,:] - 2
+ mask = idx >= 256
+ idx = idx - mask*period[:,None]
+ pred = torch.gather(exc_mem, 1, idx)
+ pred = n(pred/(1e-5+gain))
+
+ prev = exc_mem[:,-self.subframe_size:]
+ dump_signal(prev, 'prev_in.f32')
+ prev = n(prev/(1e-5+gain))
+ dump_signal(prev, 'pitch_exc.f32')
+ dump_signal(exc_mem, 'exc_mem.f32')
+
+ tmp = torch.cat((cond, pred, prev), 1)
+ #fpitch = taps[:,0:1]*pred[:,:-4] + taps[:,1:2]*pred[:,1:-3] + taps[:,2:3]*pred[:,2:-2] + taps[:,3:4]*pred[:,3:-1] + taps[:,4:]*pred[:,4:]
+ fpitch = pred[:,2:-2]
+
+ #tmp = self.dense1_glu(torch.tanh(self.sig_dense1(tmp)))
+ fwc0_out, fwc0_state = self.fwc0(tmp, states[3])
+ fwc0_out = n(fwc0_out)
+ pitch_gain = torch.sigmoid(self.gain_dense_out(fwc0_out))
+
+ gru1_state = self.gru1(torch.cat([fwc0_out, pitch_gain[:,0:1]*fpitch, prev], 1), states[0])
+ gru1_out = self.gru1_glu(n(gru1_state))
+ gru1_out = n(gru1_out)
+ gru2_state = self.gru2(torch.cat([gru1_out, pitch_gain[:,1:2]*fpitch, prev], 1), states[1])
+ gru2_out = self.gru2_glu(n(gru2_state))
+ gru2_out = n(gru2_out)
+ gru3_state = self.gru3(torch.cat([gru2_out, pitch_gain[:,2:3]*fpitch, prev], 1), states[2])
+ gru3_out = self.gru3_glu(n(gru3_state))
+ gru3_out = n(gru3_out)
+ gru3_out = torch.cat([gru1_out, gru2_out, gru3_out, fwc0_out], 1)
+ skip_out = torch.tanh(self.skip_dense(torch.cat([gru3_out, pitch_gain[:,3:4]*fpitch, prev], 1)))
+ skip_out = self.skip_glu(n(skip_out))
+ sig_out = torch.tanh(self.sig_dense_out(skip_out))
+ dump_signal(sig_out, 'exc_out.f32')
+ #taps = self.ptaps_dense(gru3_out)
+ #taps = .2*taps + torch.exp(taps)
+ #taps = taps / (1e-2 + torch.sum(torch.abs(taps), dim=-1, keepdim=True))
+ #dump_signal(taps, 'taps.f32')
+
+ dump_signal(pitch_gain, 'pgain.f32')
+ #sig_out = (sig_out + pitch_gain*fpitch) * gain
+ sig_out = sig_out * gain
+ exc_mem = torch.cat([exc_mem[:,self.subframe_size:], sig_out], 1)
+ prev_pred = torch.cat([prev_pred[:,self.subframe_size:], fpitch], 1)
+ dump_signal(sig_out, 'sig_out.f32')
+ return sig_out, exc_mem, prev_pred, (gru1_state, gru2_state, gru3_state, fwc0_state)
+
+class FARGAN(nn.Module):
+ def __init__(self, subframe_size=40, nb_subframes=4, feature_dim=20, cond_size=256, passthrough_size=0, has_gain=False, gamma=None):
+ super(FARGAN, self).__init__()
+
+ self.subframe_size = subframe_size
+ self.nb_subframes = nb_subframes
+ self.frame_size = self.subframe_size*self.nb_subframes
+ self.feature_dim = feature_dim
+ self.cond_size = cond_size
+
+ self.cond_net = FARGANCond(feature_dim=feature_dim, cond_size=cond_size)
+ self.sig_net = FARGANSub(subframe_size=subframe_size, nb_subframes=nb_subframes, cond_size=cond_size)
+
+ def forward(self, features, period, nb_frames, pre=None, states=None):
+ device = features.device
+ batch_size = features.size(0)
+
+ prev = torch.zeros(batch_size, 256, device=device)
+ exc_mem = torch.zeros(batch_size, 256, device=device)
+ nb_pre_frames = pre.size(1)//self.frame_size if pre is not None else 0
+
+ states = (
+ torch.zeros(batch_size, 160, device=device),
+ torch.zeros(batch_size, 128, device=device),
+ torch.zeros(batch_size, 128, device=device),
+ torch.zeros(batch_size, (2*self.subframe_size+80+4)*1, device=device)
+ )
+
+ sig = torch.zeros((batch_size, 0), device=device)
+ cond = self.cond_net(features, period)
+ if pre is not None:
+ exc_mem[:,-self.frame_size:] = pre[:, :self.frame_size]
+ start = 1 if nb_pre_frames>0 else 0
+ for n in range(start, nb_frames+nb_pre_frames):
+ for k in range(self.nb_subframes):
+ pos = n*self.frame_size + k*self.subframe_size
+ #print("now: ", preal.shape, prev.shape, sig_in.shape)
+ pitch = period[:, 3+n]
+ gain = .03*10**(0.5*features[:, 3+n, 0:1]/np.sqrt(18.0))
+ #gain = gain[:,:,None]
+ out, exc_mem, prev, states = self.sig_net(cond[:, n, k*80:(k+1)*80], prev, exc_mem, pitch, states, gain=gain)
+
+ if n < nb_pre_frames:
+ out = pre[:, pos:pos+self.subframe_size]
+ exc_mem[:,-self.subframe_size:] = out
+ else:
+ sig = torch.cat([sig, out], 1)
+
+ states = [s.detach() for s in states]
+ return sig, states
diff --git a/dnn/torch/fargan/filters.py b/dnn/torch/fargan/filters.py
new file mode 100644
index 00000000..8ec97ea6
--- /dev/null
+++ b/dnn/torch/fargan/filters.py
@@ -0,0 +1,46 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+import math
+
+def toeplitz_from_filter(a):
+ device = a.device
+ L = a.size(-1)
+ size0 = (*(a.shape[:-1]), L, L+1)
+ size = (*(a.shape[:-1]), L, L)
+ rnge = torch.arange(0, L, dtype=torch.int64, device=device)
+ z = torch.tensor(0, device=device)
+ idx = torch.maximum(rnge[:,None] - rnge[None,:] + 1, z)
+ a = torch.cat([a[...,:1]*0, a], -1)
+ #print(a)
+ a = a[...,None,:]
+ #print(idx)
+ a = torch.broadcast_to(a, size0)
+ idx = torch.broadcast_to(idx, size)
+ #print(idx)
+ return torch.gather(a, -1, idx)
+
+def filter_iir_response(a, N):
+ device = a.device
+ L = a.size(-1)
+ ar = a.flip(dims=(2,))
+ size = (*(a.shape[:-1]), N)
+ R = torch.zeros(size, device=device)
+ R[:,:,0] = torch.ones((a.shape[:-1]), device=device)
+ for i in range(1, L):
+ R[:,:,i] = - torch.sum(ar[:,:,L-i-1:-1] * R[:,:,:i], axis=-1)
+ #R[:,:,i] = - torch.einsum('ijk,ijk->ij', ar[:,:,L-i-1:-1], R[:,:,:i])
+ for i in range(L, N):
+ R[:,:,i] = - torch.sum(ar[:,:,:-1] * R[:,:,i-L+1:i], axis=-1)
+ #R[:,:,i] = - torch.einsum('ijk,ijk->ij', ar[:,:,:-1], R[:,:,i-L+1:i])
+ return R
+
+if __name__ == '__main__':
+ #a = torch.tensor([ [[1, -.9, 0.02], [1, -.8, .01]], [[1, .9, 0], [1, .8, 0]]])
+ a = torch.tensor([ [[1, -.9, 0.02], [1, -.8, .01]]])
+ A = toeplitz_from_filter(a)
+ #print(A)
+ R = filter_iir_response(a, 5)
+
+ RA = toeplitz_from_filter(R)
+ print(RA)
diff --git a/dnn/torch/fargan/rc.py b/dnn/torch/fargan/rc.py
new file mode 100644
index 00000000..7f67016a
--- /dev/null
+++ b/dnn/torch/fargan/rc.py
@@ -0,0 +1,29 @@
+import torch
+
+
+
+def rc2lpc(rc):
+ order = rc.shape[-1]
+ lpc=rc[...,0:1]
+ for i in range(1, order):
+ lpc = torch.cat([lpc + rc[...,i:i+1]*torch.flip(lpc,dims=(-1,)), rc[...,i:i+1]], -1)
+ #print("to:", lpc)
+ return lpc
+
+def lpc2rc(lpc):
+ order = lpc.shape[-1]
+ rc = lpc[...,-1:]
+ for i in range(order-1, 0, -1):
+ ki = lpc[...,-1:]
+ lpc = lpc[...,:-1]
+ lpc = (lpc - ki*torch.flip(lpc,dims=(-1,)))/(1 - ki*ki)
+ rc = torch.cat([lpc[...,-1:] , rc], -1)
+ return rc
+
+if __name__ == "__main__":
+ rc = torch.tensor([[.5, -.5, .6, -.6]])
+ print(rc)
+ lpc = rc2lpc(rc)
+ print(lpc)
+ rc2 = lpc2rc(lpc)
+ print(rc2)
diff --git a/dnn/torch/fargan/stft_loss.py b/dnn/torch/fargan/stft_loss.py
new file mode 100644
index 00000000..8c904054
--- /dev/null
+++ b/dnn/torch/fargan/stft_loss.py
@@ -0,0 +1,186 @@
+"""STFT-based Loss modules."""
+
+import torch
+import torch.nn.functional as F
+import numpy as np
+import torchaudio
+
+
+def stft(x, fft_size, hop_size, win_length, window):
+ """Perform STFT and convert to magnitude spectrogram.
+ Args:
+ x (Tensor): Input signal tensor (B, T).
+ fft_size (int): FFT size.
+ hop_size (int): Hop size.
+ win_length (int): Window length.
+ window (str): Window function type.
+ Returns:
+ Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
+ """
+
+ #x_stft = torch.stft(x, fft_size, hop_size, win_length, window, return_complex=False)
+ #real = x_stft[..., 0]
+ #imag = x_stft[..., 1]
+
+ # (kan-bayashi): clamp is needed to avoid nan or inf
+ #return torchaudio.functional.amplitude_to_DB(torch.abs(x_stft),db_multiplier=0.0, multiplier=20,amin=1e-05,top_db=80)
+ #return torch.clamp(torch.abs(x_stft), min=1e-7)
+
+ x_stft = torch.stft(x, fft_size, hop_size, win_length, window, return_complex=True)
+ return torch.clamp(torch.abs(x_stft), min=1e-7)
+
+class SpectralConvergenceLoss(torch.nn.Module):
+ """Spectral convergence loss module."""
+
+ def __init__(self):
+ """Initilize spectral convergence loss module."""
+ super(SpectralConvergenceLoss, self).__init__()
+
+ def forward(self, x_mag, y_mag):
+ """Calculate forward propagation.
+ Args:
+ x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
+ y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
+ Returns:
+ Tensor: Spectral convergence loss value.
+ """
+ x_mag = torch.sqrt(x_mag)
+ y_mag = torch.sqrt(y_mag)
+ return torch.norm(y_mag - x_mag, p=1) / torch.norm(y_mag, p=1)
+
+class LogSTFTMagnitudeLoss(torch.nn.Module):
+ """Log STFT magnitude loss module."""
+
+ def __init__(self):
+ """Initilize los STFT magnitude loss module."""
+ super(LogSTFTMagnitudeLoss, self).__init__()
+
+ def forward(self, x, y):
+ """Calculate forward propagation.
+ Args:
+ x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
+ y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
+ Returns:
+ Tensor: Log STFT magnitude loss value.
+ """
+ #F.l1_loss(torch.sqrt(y_mag), torch.sqrt(x_mag)) +
+ #F.l1_loss(torchaudio.functional.amplitude_to_DB(y_mag,db_multiplier=0.0, multiplier=20,amin=1e-05,top_db=80),\
+ #torchaudio.functional.amplitude_to_DB(x_mag,db_multiplier=0.0, multiplier=20,amin=1e-05,top_db=80))
+
+ #y_mag[:,:y_mag.size(1)//2,:] = y_mag[:,:y_mag.size(1)//2,:] *0.0
+
+ #return F.l1_loss(torch.log(y_mag) + torch.sqrt(y_mag), torch.log(x_mag) + torch.sqrt(x_mag))
+
+ #return F.l1_loss(y_mag, x_mag)
+
+ error_loss = F.l1_loss(y, x) #+ F.l1_loss(torch.sqrt(y), torch.sqrt(x))#F.l1_loss(torch.log(y), torch.log(x))#
+
+ #x = torch.log(x)
+ #y = torch.log(y)
+ #x = x.permute(0,2,1).contiguous()
+ #y = y.permute(0,2,1).contiguous()
+
+ '''mean_x = torch.mean(x, dim=1, keepdim=True)
+ mean_y = torch.mean(y, dim=1, keepdim=True)
+
+ var_x = torch.var(x, dim=1, keepdim=True)
+ var_y = torch.var(y, dim=1, keepdim=True)
+
+ std_x = torch.std(x, dim=1, keepdim=True)
+ std_y = torch.std(y, dim=1, keepdim=True)
+
+ x_minus_mean = x - mean_x
+ y_minus_mean = y - mean_y
+
+ pearson_corr = torch.sum(x_minus_mean * y_minus_mean, dim=1, keepdim=True) / \
+ (torch.sqrt(torch.sum(x_minus_mean ** 2, dim=1, keepdim=True) + 1e-7) * \
+ torch.sqrt(torch.sum(y_minus_mean ** 2, dim=1, keepdim=True) + 1e-7))
+
+ numerator = 2.0 * pearson_corr * std_x * std_y
+ denominator = var_x + var_y + (mean_y - mean_x)**2
+
+ ccc = numerator/denominator
+
+ ccc_loss = F.l1_loss(1.0 - ccc, torch.zeros_like(ccc))'''
+
+ return error_loss #+ ccc_loss#+ ccc_loss
+
+
+class STFTLoss(torch.nn.Module):
+ """STFT loss module."""
+
+ def __init__(self, device, fft_size=1024, shift_size=120, win_length=600, window="hann_window"):
+ """Initialize STFT loss module."""
+ super(STFTLoss, self).__init__()
+ self.fft_size = fft_size
+ self.shift_size = shift_size
+ self.win_length = win_length
+ self.window = getattr(torch, window)(win_length).to(device)
+ self.spectral_convergenge_loss = SpectralConvergenceLoss()
+ self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss()
+
+ def forward(self, x, y):
+ """Calculate forward propagation.
+ Args:
+ x (Tensor): Predicted signal (B, T).
+ y (Tensor): Groundtruth signal (B, T).
+ Returns:
+ Tensor: Spectral convergence loss value.
+ Tensor: Log STFT magnitude loss value.
+ """
+ x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window)
+ y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window)
+ sc_loss = self.spectral_convergenge_loss(x_mag, y_mag)
+ mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag)
+
+ return sc_loss, mag_loss
+
+
+class MultiResolutionSTFTLoss(torch.nn.Module):
+
+ '''def __init__(self,
+ device,
+ fft_sizes=[2048, 1024, 512, 256, 128, 64],
+ hop_sizes=[512, 256, 128, 64, 32, 16],
+ win_lengths=[2048, 1024, 512, 256, 128, 64],
+ window="hann_window"):'''
+
+ '''def __init__(self,
+ device,
+ fft_sizes=[2048, 1024, 512, 256, 128, 64],
+ hop_sizes=[256, 128, 64, 32, 16, 8],
+ win_lengths=[1024, 512, 256, 128, 64, 32],
+ window="hann_window"):'''
+
+ def __init__(self,
+ device,
+ fft_sizes=[2560, 1280, 640, 320, 160, 80],
+ hop_sizes=[640, 320, 160, 80, 40, 20],
+ win_lengths=[2560, 1280, 640, 320, 160, 80],
+ window="hann_window"):
+
+ super(MultiResolutionSTFTLoss, self).__init__()
+ assert len(fft_sizes) == len(hop_sizes) == len(win_lengths)
+ self.stft_losses = torch.nn.ModuleList()
+ for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths):
+ self.stft_losses += [STFTLoss(device, fs, ss, wl, window)]
+
+ def forward(self, x, y):
+ """Calculate forward propagation.
+ Args:
+ x (Tensor): Predicted signal (B, T).
+ y (Tensor): Groundtruth signal (B, T).
+ Returns:
+ Tensor: Multi resolution spectral convergence loss value.
+ Tensor: Multi resolution log STFT magnitude loss value.
+ """
+ sc_loss = 0.0
+ mag_loss = 0.0
+ for f in self.stft_losses:
+ sc_l, mag_l = f(x, y)
+ sc_loss += sc_l
+ #mag_loss += mag_l
+ sc_loss /= len(self.stft_losses)
+ mag_loss /= len(self.stft_losses)
+
+ return sc_loss #mag_loss #+
diff --git a/dnn/torch/fargan/test_fargan.py b/dnn/torch/fargan/test_fargan.py
new file mode 100644
index 00000000..d3aeb613
--- /dev/null
+++ b/dnn/torch/fargan/test_fargan.py
@@ -0,0 +1,128 @@
+import os
+import argparse
+import numpy as np
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import tqdm
+
+import fargan
+from dataset import FARGANDataset
+
+nb_features = 36
+nb_used_features = 20
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('model', type=str, help='CELPNet model')
+parser.add_argument('features', type=str, help='path to feature file in .f32 format')
+parser.add_argument('output', type=str, help='path to output file (16-bit PCM)')
+
+parser.add_argument('--cuda-visible-devices', type=str, help="comma separates list of cuda visible device indices, default: CUDA_VISIBLE_DEVICES", default=None)
+
+
+model_group = parser.add_argument_group(title="model parameters")
+model_group.add_argument('--cond-size', type=int, help="first conditioning size, default: 256", default=256)
+
+args = parser.parse_args()
+
+if args.cuda_visible_devices != None:
+ os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_visible_devices
+
+
+features_file = args.features
+signal_file = args.output
+
+
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+checkpoint = torch.load(args.model, map_location='cpu')
+
+model = fargan.FARGAN(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+
+
+model.load_state_dict(checkpoint['state_dict'], strict=False)
+
+features = np.reshape(np.memmap(features_file, dtype='float32', mode='r'), (1, -1, nb_features))
+lpc = features[:,4-1:-1,nb_used_features:]
+features = features[:, :, :nb_used_features]
+#periods = np.round(50*features[:,:,nb_used_features-2]+100).astype('int')
+periods = np.round(np.clip(256./2**(features[:,:,nb_used_features-2]+1.5), 32, 255)).astype('int')
+
+
+nb_frames = features.shape[1]
+#nb_frames = 1000
+gamma = checkpoint['model_kwargs']['gamma']
+
+def lpc_synthesis_one_frame(frame, filt, buffer, weighting_vector=np.ones(16)):
+
+ out = np.zeros_like(frame)
+ filt = np.flip(filt)
+
+ inp = frame[:]
+
+
+ for i in range(0, inp.shape[0]):
+
+ s = inp[i] - np.dot(buffer*weighting_vector, filt)
+
+ buffer[0] = s
+
+ buffer = np.roll(buffer, -1)
+
+ out[i] = s
+
+ return out
+
+def inverse_perceptual_weighting (pw_signal, filters, weighting_vector):
+
+ #inverse perceptual weighting= H_preemph / W(z/gamma)
+
+ signal = np.zeros_like(pw_signal)
+ buffer = np.zeros(16)
+ num_frames = pw_signal.shape[0] //160
+ assert num_frames == filters.shape[0]
+ for frame_idx in range(0, num_frames):
+
+ in_frame = pw_signal[frame_idx*160: (frame_idx+1)*160][:]
+ out_sig_frame = lpc_synthesis_one_frame(in_frame, filters[frame_idx, :], buffer, weighting_vector)
+ signal[frame_idx*160: (frame_idx+1)*160] = out_sig_frame[:]
+ buffer[:] = out_sig_frame[-16:]
+ return signal
+
+def inverse_perceptual_weighting40 (pw_signal, filters):
+
+ #inverse perceptual weighting= H_preemph / W(z/gamma)
+
+ signal = np.zeros_like(pw_signal)
+ buffer = np.zeros(16)
+ num_frames = pw_signal.shape[0] //40
+ assert num_frames == filters.shape[0]
+ for frame_idx in range(0, num_frames):
+ in_frame = pw_signal[frame_idx*40: (frame_idx+1)*40][:]
+ out_sig_frame = lpc_synthesis_one_frame(in_frame, filters[frame_idx, :], buffer)
+ signal[frame_idx*40: (frame_idx+1)*40] = out_sig_frame[:]
+ buffer[:] = out_sig_frame[-16:]
+ return signal
+
+from scipy.signal import lfilter
+
+if __name__ == '__main__':
+ model.to(device)
+ features = torch.tensor(features).to(device)
+ #lpc = torch.tensor(lpc).to(device)
+ periods = torch.tensor(periods).to(device)
+ weighting = gamma**np.arange(1, 17)
+ lpc = lpc*weighting
+ lpc = fargan.interp_lpc(torch.tensor(lpc), 4).numpy()
+
+ sig, _ = model(features, periods, nb_frames - 4)
+ #weighting_vector = np.array([gamma**i for i in range(16,0,-1)])
+ sig = sig.detach().numpy().flatten()
+ sig = lfilter(np.array([1.]), np.array([1., -.85]), sig)
+ #sig = inverse_perceptual_weighting40(sig, lpc[0,:,:])
+
+ pcm = np.round(32768*np.clip(sig, a_max=.99, a_min=-.99)).astype('int16')
+ pcm.tofile(signal_file)
diff --git a/dnn/torch/fargan/train_fargan.py b/dnn/torch/fargan/train_fargan.py
new file mode 100644
index 00000000..1b2e2009
--- /dev/null
+++ b/dnn/torch/fargan/train_fargan.py
@@ -0,0 +1,168 @@
+import os
+import argparse
+import random
+import numpy as np
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import tqdm
+
+import fargan
+from dataset import FARGANDataset
+from stft_loss import *
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('features', type=str, help='path to feature file in .f32 format')
+parser.add_argument('signal', type=str, help='path to signal file in .s16 format')
+parser.add_argument('output', type=str, help='path to output folder')
+
+parser.add_argument('--suffix', type=str, help="model name suffix", default="")
+parser.add_argument('--cuda-visible-devices', type=str, help="comma separates list of cuda visible device indices, default: CUDA_VISIBLE_DEVICES", default=None)
+
+
+model_group = parser.add_argument_group(title="model parameters")
+model_group.add_argument('--cond-size', type=int, help="first conditioning size, default: 256", default=256)
+model_group.add_argument('--gamma', type=float, help="Use A(z/gamma), default: 0.9", default=0.9)
+
+training_group = parser.add_argument_group(title="training parameters")
+training_group.add_argument('--batch-size', type=int, help="batch size, default: 512", default=512)
+training_group.add_argument('--lr', type=float, help='learning rate, default: 1e-3', default=1e-3)
+training_group.add_argument('--epochs', type=int, help='number of training epochs, default: 20', default=20)
+training_group.add_argument('--sequence-length', type=int, help='sequence length, default: 15', default=15)
+training_group.add_argument('--lr-decay', type=float, help='learning rate decay factor, default: 1e-4', default=1e-4)
+training_group.add_argument('--initial-checkpoint', type=str, help='initial checkpoint to start training from, default: None', default=None)
+
+args = parser.parse_args()
+
+if args.cuda_visible_devices != None:
+ os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_visible_devices
+
+# checkpoints
+checkpoint_dir = os.path.join(args.output, 'checkpoints')
+checkpoint = dict()
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+
+# training parameters
+batch_size = args.batch_size
+lr = args.lr
+epochs = args.epochs
+sequence_length = args.sequence_length
+lr_decay = args.lr_decay
+
+adam_betas = [0.8, 0.95]
+adam_eps = 1e-8
+features_file = args.features
+signal_file = args.signal
+
+# model parameters
+cond_size = args.cond_size
+
+
+checkpoint['batch_size'] = batch_size
+checkpoint['lr'] = lr
+checkpoint['lr_decay'] = lr_decay
+checkpoint['epochs'] = epochs
+checkpoint['sequence_length'] = sequence_length
+checkpoint['adam_betas'] = adam_betas
+
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+checkpoint['model_args'] = ()
+checkpoint['model_kwargs'] = {'cond_size': cond_size, 'gamma': args.gamma}
+print(checkpoint['model_kwargs'])
+model = fargan.FARGAN(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+
+#model = fargan.FARGAN()
+#model = nn.DataParallel(model)
+
+if type(args.initial_checkpoint) != type(None):
+ checkpoint = torch.load(args.initial_checkpoint, map_location='cpu')
+ model.load_state_dict(checkpoint['state_dict'], strict=False)
+
+checkpoint['state_dict'] = model.state_dict()
+
+
+dataset = FARGANDataset(features_file, signal_file, sequence_length=sequence_length)
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4)
+
+
+optimizer = torch.optim.AdamW(model.parameters(), lr=lr, betas=adam_betas, eps=adam_eps)
+
+
+# learning rate scheduler
+scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay * x))
+
+states = None
+
+spect_loss = MultiResolutionSTFTLoss(device).to(device)
+
+if __name__ == '__main__':
+ model.to(device)
+
+ for epoch in range(1, epochs + 1):
+
+ running_specc = 0
+ running_cont_loss = 0
+ running_loss = 0
+
+ print(f"training epoch {epoch}...")
+ with tqdm.tqdm(dataloader, unit='batch') as tepoch:
+ for i, (features, periods, target, lpc) in enumerate(tepoch):
+ optimizer.zero_grad()
+ features = features.to(device)
+ #lpc = torch.tensor(fargan.interp_lpc(lpc.numpy(), 4))
+ #print("interp size", lpc.shape)
+ #lpc = lpc.to(device)
+ #lpc = lpc*(args.gamma**torch.arange(1,17, device=device))
+ #lpc = fargan.interp_lpc(lpc, 4)
+ periods = periods.to(device)
+ if (np.random.rand() > 0.1):
+ target = target[:, :sequence_length*160]
+ #lpc = lpc[:,:sequence_length*4,:]
+ features = features[:,:sequence_length+4,:]
+ periods = periods[:,:sequence_length+4]
+ else:
+ target=target[::2, :]
+ #lpc=lpc[::2,:]
+ features=features[::2,:]
+ periods=periods[::2,:]
+ target = target.to(device)
+ #print(target.shape, lpc.shape)
+ #target = fargan.analysis_filter(target, lpc[:,:,:], nb_subframes=1, gamma=args.gamma)
+
+ #nb_pre = random.randrange(1, 6)
+ nb_pre = 2
+ pre = target[:, :nb_pre*160]
+ sig, states = model(features, periods, target.size(1)//160 - nb_pre, pre=pre, states=None)
+ sig = torch.cat([pre, sig], -1)
+
+ cont_loss = fargan.sig_loss(target[:, nb_pre*160:nb_pre*160+160], sig[:, nb_pre*160:nb_pre*160+160])
+ specc_loss = spect_loss(sig, target.detach())
+ loss = .03*cont_loss + specc_loss
+
+ loss.backward()
+ optimizer.step()
+
+ #model.clip_weights()
+
+ scheduler.step()
+
+ running_specc += specc_loss.detach().cpu().item()
+ running_cont_loss += cont_loss.detach().cpu().item()
+
+ running_loss += loss.detach().cpu().item()
+ tepoch.set_postfix(loss=f"{running_loss/(i+1):8.5f}",
+ cont_loss=f"{running_cont_loss/(i+1):8.5f}",
+ specc=f"{running_specc/(i+1):8.5f}",
+ )
+
+ # save checkpoint
+ checkpoint_path = os.path.join(checkpoint_dir, f'fargan{args.suffix}_{epoch}.pth')
+ checkpoint['state_dict'] = model.state_dict()
+ checkpoint['loss'] = running_loss / len(dataloader)
+ checkpoint['epoch'] = epoch
+ torch.save(checkpoint, checkpoint_path)
diff --git a/dnn/torch/fwgan/dump_model_weights.py b/dnn/torch/fwgan/dump_model_weights.py
new file mode 100644
index 00000000..64f955ac
--- /dev/null
+++ b/dnn/torch/fwgan/dump_model_weights.py
@@ -0,0 +1,88 @@
+import os
+import sys
+import argparse
+
+import torch
+from torch import nn
+
+
+sys.path.append(os.path.join(os.path.split(__file__)[0], '../weight-exchange'))
+import wexchange.torch
+
+from models import model_dict
+
+unquantized = [
+ 'bfcc_with_corr_upsampler.fc',
+ 'cont_net.0',
+ 'fwc6.cont_fc.0',
+ 'fwc6.fc.0',
+ 'fwc6.fc.1.gate',
+ 'fwc7.cont_fc.0',
+ 'fwc7.fc.0',
+ 'fwc7.fc.1.gate'
+]
+
+description=f"""
+This is an unsafe dumping script for FWGAN models. It assumes that all weights are included in Linear, Conv1d or GRU layer
+and will fail to export any other weights.
+
+Furthermore, the quanitze option relies on the following explicit list of layers to be excluded:
+{unquantized}.
+
+Modify this script manually if adjustments are needed.
+"""
+
+parser = argparse.ArgumentParser(description=description)
+parser.add_argument('model', choices=['fwgan400', 'fwgan500'], help='model name')
+parser.add_argument('weightfile', type=str, help='weight file path')
+parser.add_argument('export_folder', type=str)
+parser.add_argument('--export-filename', type=str, default='fwgan_data', help='filename for source and header file (.c and .h will be added), defaults to fwgan_data')
+parser.add_argument('--struct-name', type=str, default='FWGAN', help='name for C struct, defaults to FWGAN')
+parser.add_argument('--quantize', action='store_true', help='apply quantization')
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ model = model_dict[args.model]()
+
+ print(f"loading weights from {args.weightfile}...")
+ saved_gen= torch.load(args.weightfile, map_location='cpu')
+ model.load_state_dict(saved_gen)
+ def _remove_weight_norm(m):
+ try:
+ torch.nn.utils.remove_weight_norm(m)
+ except ValueError: # this module didn't have weight norm
+ return
+ model.apply(_remove_weight_norm)
+
+
+ print("dumping model...")
+ quantize_model=args.quantize
+
+ output_folder = args.export_folder
+ os.makedirs(output_folder, exist_ok=True)
+
+ writer = wexchange.c_export.c_writer.CWriter(os.path.join(output_folder, args.export_filename), model_struct_name=args.struct_name)
+
+ for name, module in model.named_modules():
+
+ if quantize_model:
+ quantize=name not in unquantized
+ scale = None if quantize else 1/128
+ else:
+ quantize=False
+ scale=1/128
+
+ if isinstance(module, nn.Linear):
+ print(f"dumping linear layer {name}...")
+ wexchange.torch.dump_torch_dense_weights(writer, module, name.replace('.', '_'), quantize=quantize, scale=scale)
+
+ if isinstance(module, nn.Conv1d):
+ print(f"dumping conv1d layer {name}...")
+ wexchange.torch.dump_torch_conv1d_weights(writer, module, name.replace('.', '_'), quantize=quantize, scale=scale)
+
+ if isinstance(module, nn.GRU):
+ print(f"dumping GRU layer {name}...")
+ wexchange.torch.dump_torch_gru_weights(writer, module, name.replace('.', '_'), quantize=quantize, scale=scale, recurrent_scale=scale)
+
+ writer.close()
diff --git a/dnn/torch/fwgan/inference.py b/dnn/torch/fwgan/inference.py
new file mode 100644
index 00000000..c06b68b1
--- /dev/null
+++ b/dnn/torch/fwgan/inference.py
@@ -0,0 +1,141 @@
+import os
+import time
+import torch
+import numpy as np
+from scipy import signal as si
+from scipy.io import wavfile
+import argparse
+
+from models import model_dict
+
+parser = argparse.ArgumentParser()
+parser.add_argument('model', choices=['fwgan400', 'fwgan500'], help='model name')
+parser.add_argument('weightfile', type=str, help='weight file')
+parser.add_argument('input', type=str, help='input: feature file or folder with feature files')
+parser.add_argument('output', type=str, help='output: wav file name or folder name, depending on input')
+
+
+########################### Signal Processing Layers ###########################
+
+def preemphasis(x, coef= -0.85):
+
+ return si.lfilter(np.array([1.0, coef]), np.array([1.0]), x).astype('float32')
+
+def deemphasis(x, coef= -0.85):
+
+ return si.lfilter(np.array([1.0]), np.array([1.0, coef]), x).astype('float32')
+
+gamma = 0.92
+weighting_vector = np.array([gamma**i for i in range(16,0,-1)])
+
+
+def lpc_synthesis_one_frame(frame, filt, buffer, weighting_vector=np.ones(16)):
+
+ out = np.zeros_like(frame)
+
+ filt = np.flip(filt)
+
+ inp = frame[:]
+
+
+ for i in range(0, inp.shape[0]):
+
+ s = inp[i] - np.dot(buffer*weighting_vector, filt)
+
+ buffer[0] = s
+
+ buffer = np.roll(buffer, -1)
+
+ out[i] = s
+
+ return out
+
+def inverse_perceptual_weighting (pw_signal, filters, weighting_vector):
+
+ #inverse perceptual weighting= H_preemph / W(z/gamma)
+
+ pw_signal = preemphasis(pw_signal)
+
+ signal = np.zeros_like(pw_signal)
+ buffer = np.zeros(16)
+ num_frames = pw_signal.shape[0] //160
+ assert num_frames == filters.shape[0]
+
+ for frame_idx in range(0, num_frames):
+
+ in_frame = pw_signal[frame_idx*160: (frame_idx+1)*160][:]
+ out_sig_frame = lpc_synthesis_one_frame(in_frame, filters[frame_idx, :], buffer, weighting_vector)
+ signal[frame_idx*160: (frame_idx+1)*160] = out_sig_frame[:]
+ buffer[:] = out_sig_frame[-16:]
+
+ return signal
+
+
+def process_item(generator, feature_filename, output_filename, verbose=False):
+
+ feat = np.memmap(feature_filename, dtype='float32', mode='r')
+
+ num_feat_frames = len(feat) // 36
+ feat = np.reshape(feat, (num_feat_frames, 36))
+
+ bfcc = np.copy(feat[:, :18])
+ corr = np.copy(feat[:, 19:20]) + 0.5
+ bfcc_with_corr = torch.from_numpy(np.hstack((bfcc, corr))).type(torch.FloatTensor).unsqueeze(0)#.to(device)
+
+ period = torch.from_numpy((0.1 + 50 * np.copy(feat[:, 18:19]) + 100)\
+ .astype('int32')).type(torch.long).view(1,-1)#.to(device)
+
+ lpc_filters = np.copy(feat[:, -16:])
+
+ start_time = time.time()
+ x1 = generator(period, bfcc_with_corr, torch.zeros(1,320)) #this means the vocoder runs in complete synthesis mode with zero history audio frames
+ end_time = time.time()
+ total_time = end_time - start_time
+ x1 = x1.squeeze(1).squeeze(0).detach().cpu().numpy()
+ gen_seconds = len(x1)/16000
+ out = deemphasis(inverse_perceptual_weighting(x1, lpc_filters, weighting_vector))
+ if verbose:
+ print(f"Took {total_time:.3f}s to generate {len(x1)} samples ({gen_seconds}s) -> {gen_seconds/total_time:.2f}x real time")
+
+ out = np.clip(np.round(2**15 * out), -2**15, 2**15 -1).astype(np.int16)
+ wavfile.write(output_filename, 16000, out)
+
+
+########################### The inference loop over folder containing lpcnet feature files #################################
+if __name__ == "__main__":
+
+ args = parser.parse_args()
+
+ generator = model_dict[args.model]()
+
+
+ #Load the FWGAN500Hz Checkpoint
+ saved_gen= torch.load(args.weightfile, map_location='cpu')
+ generator.load_state_dict(saved_gen)
+
+ #this is just to remove the weight_norm from the model layers as it's no longer needed
+ def _remove_weight_norm(m):
+ try:
+ torch.nn.utils.remove_weight_norm(m)
+ except ValueError: # this module didn't have weight norm
+ return
+ generator.apply(_remove_weight_norm)
+
+ #enable inference mode
+ generator = generator.eval()
+
+ print('Successfully loaded the generator model ... start generation:')
+
+ if os.path.isdir(args.input):
+
+ os.makedirs(args.output, exist_ok=True)
+
+ for fn in os.listdir(args.input):
+ print(f"processing input {fn}...")
+ feature_filename = os.path.join(args.input, fn)
+ output_filename = os.path.join(args.output, os.path.splitext(fn)[0] + f"_{args.model}.wav")
+ process_item(generator, feature_filename, output_filename)
+ else:
+ process_item(generator, args.input, args.output)
+
+ print("Finished!") \ No newline at end of file
diff --git a/dnn/torch/fwgan/models/__init__.py b/dnn/torch/fwgan/models/__init__.py
new file mode 100644
index 00000000..d52a6eb0
--- /dev/null
+++ b/dnn/torch/fwgan/models/__init__.py
@@ -0,0 +1,7 @@
+from .fwgan400 import FWGAN400ContLarge
+from .fwgan500 import FWGAN500Cont
+
+model_dict = {
+ 'fwgan400': FWGAN400ContLarge,
+ 'fwgan500': FWGAN500Cont
+} \ No newline at end of file
diff --git a/dnn/torch/fwgan/models/fwgan400.py b/dnn/torch/fwgan/models/fwgan400.py
new file mode 100644
index 00000000..84d9849e
--- /dev/null
+++ b/dnn/torch/fwgan/models/fwgan400.py
@@ -0,0 +1,308 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.utils import weight_norm
+import numpy as np
+
+which_norm = weight_norm
+
+#################### Definition of basic model components ####################
+
+#Convolutional layer with 1 frame look-ahead (used for feature PreCondNet)
+class ConvLookahead(nn.Module):
+ def __init__(self, in_ch, out_ch, kernel_size, dilation=1, groups=1, bias= False):
+ super(ConvLookahead, self).__init__()
+ torch.manual_seed(5)
+
+ self.padding_left = (kernel_size - 2) * dilation
+ self.padding_right = 1 * dilation
+
+ self.conv = which_norm(nn.Conv1d(in_ch,out_ch,kernel_size,dilation=dilation, groups=groups, bias= bias))
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d) or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x):
+
+ x = F.pad(x,(self.padding_left, self.padding_right))
+ conv_out = self.conv(x)
+ return conv_out
+
+#(modified) GLU Activation layer definition
+class GLU(nn.Module):
+ def __init__(self, feat_size):
+ super(GLU, self).__init__()
+
+ torch.manual_seed(5)
+
+ self.gate = which_norm(nn.Linear(feat_size, feat_size, bias=False))
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d)\
+ or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x):
+
+ out = torch.tanh(x) * torch.sigmoid(self.gate(x))
+
+ return out
+
+#GRU layer definition
+class ContForwardGRU(nn.Module):
+ def __init__(self, input_size, hidden_size, num_layers=1):
+ super(ContForwardGRU, self).__init__()
+
+ torch.manual_seed(5)
+
+ self.hidden_size = hidden_size
+
+ self.cont_fc = nn.Sequential(which_norm(nn.Linear(64, self.hidden_size, bias=False)),
+ nn.Tanh())
+
+ self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True,\
+ bias=False)
+
+ self.nl = GLU(self.hidden_size)
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d) or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x, x0):
+
+ self.gru.flatten_parameters()
+
+ h0 = self.cont_fc(x0).unsqueeze(0)
+
+ output, h0 = self.gru(x, h0)
+
+ return self.nl(output)
+
+# Framewise convolution layer definition
+class ContFramewiseConv(torch.nn.Module):
+
+ def __init__(self, frame_len, out_dim, frame_kernel_size=3, act='glu', causal=True):
+
+ super(ContFramewiseConv, self).__init__()
+ torch.manual_seed(5)
+
+ self.frame_kernel_size = frame_kernel_size
+ self.frame_len = frame_len
+
+ if (causal == True) or (self.frame_kernel_size == 2):
+
+ self.required_pad_left = (self.frame_kernel_size - 1) * self.frame_len
+ self.required_pad_right = 0
+
+ self.cont_fc = nn.Sequential(which_norm(nn.Linear(64, self.required_pad_left, bias=False)),
+ nn.Tanh()
+ )
+
+ else:
+
+ self.required_pad_left = (self.frame_kernel_size - 1)//2 * self.frame_len
+ self.required_pad_right = (self.frame_kernel_size - 1)//2 * self.frame_len
+
+ self.fc_input_dim = self.frame_kernel_size * self.frame_len
+ self.fc_out_dim = out_dim
+
+ if act=='glu':
+ self.fc = nn.Sequential(which_norm(nn.Linear(self.fc_input_dim, self.fc_out_dim, bias=False)),
+ GLU(self.fc_out_dim)
+ )
+ if act=='tanh':
+ self.fc = nn.Sequential(which_norm(nn.Linear(self.fc_input_dim, self.fc_out_dim, bias=False)),
+ nn.Tanh()
+ )
+
+ self.init_weights()
+
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d) or isinstance(m, nn.Linear) or\
+ isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x, x0):
+
+ if self.frame_kernel_size == 1:
+ return self.fc(x)
+
+ x_flat = x.reshape(x.size(0),1,-1)
+ pad = self.cont_fc(x0).view(x0.size(0),1,-1)
+ x_flat_padded = torch.cat((pad, x_flat), dim=-1).unsqueeze(2)
+
+ x_flat_padded_unfolded = F.unfold(x_flat_padded,\
+ kernel_size= (1,self.fc_input_dim), stride=self.frame_len).permute(0,2,1).contiguous()
+
+ out = self.fc(x_flat_padded_unfolded)
+ return out
+
+# A fully-connected based upsampling layer definition
+class UpsampleFC(nn.Module):
+ def __init__(self, in_ch, out_ch, upsample_factor):
+ super(UpsampleFC, self).__init__()
+ torch.manual_seed(5)
+
+ self.in_ch = in_ch
+ self.out_ch = out_ch
+ self.upsample_factor = upsample_factor
+ self.fc = nn.Linear(in_ch, out_ch * upsample_factor, bias=False)
+ self.nl = nn.Tanh()
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d) or\
+ isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x):
+
+ batch_size = x.size(0)
+ x = x.permute(0, 2, 1)
+ x = self.nl(self.fc(x))
+ x = x.reshape((batch_size, -1, self.out_ch))
+ x = x.permute(0, 2, 1)
+ return x
+
+########################### The complete model definition #################################
+
+class FWGAN400ContLarge(nn.Module):
+ def __init__(self):
+ super().__init__()
+ torch.manual_seed(5)
+
+ self.bfcc_with_corr_upsampler = UpsampleFC(19,80,4)
+
+ self.feat_in_conv1 = ConvLookahead(160,256,kernel_size=5)
+ self.feat_in_nl1 = GLU(256)
+
+ self.cont_net = nn.Sequential(which_norm(nn.Linear(321, 160, bias=False)),
+ nn.Tanh(),
+ which_norm(nn.Linear(160, 160, bias=False)),
+ nn.Tanh(),
+ which_norm(nn.Linear(160, 80, bias=False)),
+ nn.Tanh(),
+ which_norm(nn.Linear(80, 80, bias=False)),
+ nn.Tanh(),
+ which_norm(nn.Linear(80, 64, bias=False)),
+ nn.Tanh(),
+ which_norm(nn.Linear(64, 64, bias=False)),
+ nn.Tanh())
+
+ self.rnn = ContForwardGRU(256,256)
+
+ self.fwc1 = ContFramewiseConv(256, 256)
+ self.fwc2 = ContFramewiseConv(256, 128)
+ self.fwc3 = ContFramewiseConv(128, 128)
+ self.fwc4 = ContFramewiseConv(128, 64)
+ self.fwc5 = ContFramewiseConv(64, 64)
+ self.fwc6 = ContFramewiseConv(64, 40)
+ self.fwc7 = ContFramewiseConv(40, 40)
+
+ self.init_weights()
+ self.count_parameters()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d) or isinstance(m, nn.Linear) or\
+ isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def count_parameters(self):
+ num_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
+ print(f"Total number of {self.__class__.__name__} network parameters = {num_params}\n")
+
+ def create_phase_signals(self, periods):
+
+ batch_size = periods.size(0)
+ progression = torch.arange(1, 160 + 1, dtype=periods.dtype, device=periods.device).view((1, -1))
+ progression = torch.repeat_interleave(progression, batch_size, 0)
+
+ phase0 = torch.zeros(batch_size, dtype=periods.dtype, device=periods.device).unsqueeze(-1)
+ chunks = []
+ for sframe in range(periods.size(1)):
+ f = (2.0 * torch.pi / periods[:, sframe]).unsqueeze(-1)
+
+ chunk_sin = torch.sin(f * progression + phase0)
+ chunk_sin = chunk_sin.reshape(chunk_sin.size(0),-1,40)
+
+ chunk_cos = torch.cos(f * progression + phase0)
+ chunk_cos = chunk_cos.reshape(chunk_cos.size(0),-1,40)
+
+ chunk = torch.cat((chunk_sin, chunk_cos), dim = -1)
+
+ phase0 = phase0 + 160 * f
+
+ chunks.append(chunk)
+
+ phase_signals = torch.cat(chunks, dim=1)
+
+ return phase_signals
+
+
+ def gain_multiply(self, x, c0):
+
+ gain = 10**(0.5*c0/np.sqrt(18.0))
+ gain = torch.repeat_interleave(gain, 160, dim=-1)
+ gain = gain.reshape(gain.size(0),1,-1).squeeze(1)
+
+ return x * gain
+
+ def forward(self, pitch_period, bfcc_with_corr, x0):
+
+ norm_x0 = torch.norm(x0,2, dim=-1, keepdim=True)
+ x0 = x0 / torch.sqrt((1e-8) + norm_x0**2)
+ x0 = torch.cat((torch.log(norm_x0 + 1e-7), x0), dim=-1)
+
+ p_embed = self.create_phase_signals(pitch_period).permute(0, 2, 1).contiguous()
+
+ envelope = self.bfcc_with_corr_upsampler(bfcc_with_corr.permute(0,2,1).contiguous())
+
+ feat_in = torch.cat((p_embed , envelope), dim=1)
+
+ wav_latent1 = self.feat_in_nl1(self.feat_in_conv1(feat_in).permute(0,2,1).contiguous())
+
+ cont_latent = self.cont_net(x0)
+
+ rnn_out = self.rnn(wav_latent1, cont_latent)
+
+ fwc1_out = self.fwc1(rnn_out, cont_latent)
+
+ fwc2_out = self.fwc2(fwc1_out, cont_latent)
+
+ fwc3_out = self.fwc3(fwc2_out, cont_latent)
+
+ fwc4_out = self.fwc4(fwc3_out, cont_latent)
+
+ fwc5_out = self.fwc5(fwc4_out, cont_latent)
+
+ fwc6_out = self.fwc6(fwc5_out, cont_latent)
+
+ fwc7_out = self.fwc7(fwc6_out, cont_latent)
+
+ waveform = fwc7_out.reshape(fwc7_out.size(0),1,-1).squeeze(1)
+
+ waveform = self.gain_multiply(waveform,bfcc_with_corr[:,:,:1])
+
+ return waveform \ No newline at end of file
diff --git a/dnn/torch/fwgan/models/fwgan500.py b/dnn/torch/fwgan/models/fwgan500.py
new file mode 100644
index 00000000..2c6dea5f
--- /dev/null
+++ b/dnn/torch/fwgan/models/fwgan500.py
@@ -0,0 +1,260 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.utils import weight_norm
+import numpy as np
+
+
+which_norm = weight_norm
+
+#################### Definition of basic model components ####################
+
+#Convolutional layer with 1 frame look-ahead (used for feature PreCondNet)
+class ConvLookahead(nn.Module):
+ def __init__(self, in_ch, out_ch, kernel_size, dilation=1, groups=1, bias= False):
+ super(ConvLookahead, self).__init__()
+ torch.manual_seed(5)
+
+ self.padding_left = (kernel_size - 2) * dilation
+ self.padding_right = 1 * dilation
+
+ self.conv = which_norm(nn.Conv1d(in_ch,out_ch,kernel_size,dilation=dilation, groups=groups, bias= bias))
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d) or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x):
+
+ x = F.pad(x,(self.padding_left, self.padding_right))
+ conv_out = self.conv(x)
+ return conv_out
+
+#(modified) GLU Activation layer definition
+class GLU(nn.Module):
+ def __init__(self, feat_size):
+ super(GLU, self).__init__()
+
+ torch.manual_seed(5)
+
+ self.gate = which_norm(nn.Linear(feat_size, feat_size, bias=False))
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d)\
+ or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x):
+
+ out = torch.tanh(x) * torch.sigmoid(self.gate(x))
+
+ return out
+
+#GRU layer definition
+class ContForwardGRU(nn.Module):
+ def __init__(self, input_size, hidden_size, num_layers=1):
+ super(ContForwardGRU, self).__init__()
+
+ torch.manual_seed(5)
+
+ self.hidden_size = hidden_size
+
+ #This is to initialize the layer with history audio samples for continuation.
+ self.cont_fc = nn.Sequential(which_norm(nn.Linear(320, self.hidden_size, bias=False)),
+ nn.Tanh())
+
+ self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True,\
+ bias=False)
+
+ self.nl = GLU(self.hidden_size)
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d) or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x, x0):
+
+ self.gru.flatten_parameters()
+
+ h0 = self.cont_fc(x0).unsqueeze(0)
+
+ output, h0 = self.gru(x, h0)
+
+ return self.nl(output)
+
+# Framewise convolution layer definition
+class ContFramewiseConv(torch.nn.Module):
+
+ def __init__(self, frame_len, out_dim, frame_kernel_size=3, act='glu', causal=True):
+
+ super(ContFramewiseConv, self).__init__()
+ torch.manual_seed(5)
+
+ self.frame_kernel_size = frame_kernel_size
+ self.frame_len = frame_len
+
+ if (causal == True) or (self.frame_kernel_size == 2):
+
+ self.required_pad_left = (self.frame_kernel_size - 1) * self.frame_len
+ self.required_pad_right = 0
+
+ #This is to initialize the layer with history audio samples for continuation.
+ self.cont_fc = nn.Sequential(which_norm(nn.Linear(320, self.required_pad_left, bias=False)),
+ nn.Tanh()
+ )
+
+ else:
+ #This means non-causal frame-wise convolution. We don't use it at the moment
+ self.required_pad_left = (self.frame_kernel_size - 1)//2 * self.frame_len
+ self.required_pad_right = (self.frame_kernel_size - 1)//2 * self.frame_len
+
+ self.fc_input_dim = self.frame_kernel_size * self.frame_len
+ self.fc_out_dim = out_dim
+
+ if act=='glu':
+ self.fc = nn.Sequential(which_norm(nn.Linear(self.fc_input_dim, self.fc_out_dim, bias=False)),
+ GLU(self.fc_out_dim)
+ )
+ if act=='tanh':
+ self.fc = nn.Sequential(which_norm(nn.Linear(self.fc_input_dim, self.fc_out_dim, bias=False)),
+ nn.Tanh()
+ )
+
+ self.init_weights()
+
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d) or isinstance(m, nn.Linear) or\
+ isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x, x0):
+
+ if self.frame_kernel_size == 1:
+ return self.fc(x)
+
+ x_flat = x.reshape(x.size(0),1,-1)
+ pad = self.cont_fc(x0).view(x0.size(0),1,-1)
+ x_flat_padded = torch.cat((pad, x_flat), dim=-1).unsqueeze(2)
+
+ x_flat_padded_unfolded = F.unfold(x_flat_padded,\
+ kernel_size= (1,self.fc_input_dim), stride=self.frame_len).permute(0,2,1).contiguous()
+
+ out = self.fc(x_flat_padded_unfolded)
+ return out
+
+########################### The complete model definition #################################
+
+class FWGAN500Cont(nn.Module):
+ def __init__(self):
+ super().__init__()
+ torch.manual_seed(5)
+
+ #PrecondNet:
+ self.bfcc_with_corr_upsampler = nn.Sequential(nn.ConvTranspose1d(19,64,kernel_size=5,stride=5,padding=0,\
+ bias=False),
+ nn.Tanh())
+
+ self.feat_in_conv = ConvLookahead(128,256,kernel_size=5)
+ self.feat_in_nl = GLU(256)
+
+ #GRU:
+ self.rnn = ContForwardGRU(256,256)
+
+ #Frame-wise convolution stack:
+ self.fwc1 = ContFramewiseConv(256, 256)
+ self.fwc2 = ContFramewiseConv(256, 128)
+ self.fwc3 = ContFramewiseConv(128, 128)
+ self.fwc4 = ContFramewiseConv(128, 64)
+ self.fwc5 = ContFramewiseConv(64, 64)
+ self.fwc6 = ContFramewiseConv(64, 32)
+ self.fwc7 = ContFramewiseConv(32, 32, act='tanh')
+
+ self.init_weights()
+ self.count_parameters()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d) or isinstance(m, nn.Linear) or\
+ isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def count_parameters(self):
+ num_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
+ print(f"Total number of {self.__class__.__name__} network parameters = {num_params}\n")
+
+ def create_phase_signals(self, periods):
+
+ batch_size = periods.size(0)
+ progression = torch.arange(1, 160 + 1, dtype=periods.dtype, device=periods.device).view((1, -1))
+ progression = torch.repeat_interleave(progression, batch_size, 0)
+
+ phase0 = torch.zeros(batch_size, dtype=periods.dtype, device=periods.device).unsqueeze(-1)
+ chunks = []
+ for sframe in range(periods.size(1)):
+ f = (2.0 * torch.pi / periods[:, sframe]).unsqueeze(-1)
+
+ chunk_sin = torch.sin(f * progression + phase0)
+ chunk_sin = chunk_sin.reshape(chunk_sin.size(0),-1,32)
+
+ chunk_cos = torch.cos(f * progression + phase0)
+ chunk_cos = chunk_cos.reshape(chunk_cos.size(0),-1,32)
+
+ chunk = torch.cat((chunk_sin, chunk_cos), dim = -1)
+
+ phase0 = phase0 + 160 * f
+
+ chunks.append(chunk)
+
+ phase_signals = torch.cat(chunks, dim=1)
+
+ return phase_signals
+
+
+ def gain_multiply(self, x, c0):
+
+ gain = 10**(0.5*c0/np.sqrt(18.0))
+ gain = torch.repeat_interleave(gain, 160, dim=-1)
+ gain = gain.reshape(gain.size(0),1,-1).squeeze(1)
+
+ return x * gain
+
+ def forward(self, pitch_period, bfcc_with_corr, x0):
+
+ #This should create a latent representation of shape [Batch_dim, 500 frames, 256 elemets per frame]
+ p_embed = self.create_phase_signals(pitch_period).permute(0, 2, 1).contiguous()
+ envelope = self.bfcc_with_corr_upsampler(bfcc_with_corr.permute(0,2,1).contiguous())
+ feat_in = torch.cat((p_embed , envelope), dim=1)
+ wav_latent = self.feat_in_nl(self.feat_in_conv(feat_in).permute(0,2,1).contiguous())
+
+ #Generation with continuation using history samples x0 starts from here:
+
+ rnn_out = self.rnn(wav_latent, x0)
+
+ fwc1_out = self.fwc1(rnn_out, x0)
+ fwc2_out = self.fwc2(fwc1_out, x0)
+ fwc3_out = self.fwc3(fwc2_out, x0)
+ fwc4_out = self.fwc4(fwc3_out, x0)
+ fwc5_out = self.fwc5(fwc4_out, x0)
+ fwc6_out = self.fwc6(fwc5_out, x0)
+ fwc7_out = self.fwc7(fwc6_out, x0)
+
+ waveform_unscaled = fwc7_out.reshape(fwc7_out.size(0),1,-1).squeeze(1)
+ waveform = self.gain_multiply(waveform_unscaled,bfcc_with_corr[:,:,:1])
+
+ return waveform
diff --git a/dnn/torch/lossgen/README.md b/dnn/torch/lossgen/README.md
new file mode 100644
index 00000000..55c1b442
--- /dev/null
+++ b/dnn/torch/lossgen/README.md
@@ -0,0 +1,27 @@
+#Packet loss simulator
+
+This code is an attempt at simulating better packet loss scenarios. The most common way of simulating
+packet loss is to use a random sequence where each packet loss event is uncorrelated with previous events.
+That is a simplistic model since we know that losses often occur in bursts. This model uses real data
+to build a generative model for packet loss.
+
+We use the training data provided for the Audio Deep Packet Loss Concealment Challenge, which is available at:
+
+http://plcchallenge2022pub.blob.core.windows.net/plcchallengearchive/test_train.tar.gz
+
+To create the training data, run:
+
+`./process_data.sh /<path>/test_train/train/lossy_signals/`
+
+That will create an ascii loss\_sorted.txt file with all loss data sorted in increasing packet loss
+percentage. Then just run:
+
+`python ./train_lossgen.py`
+
+to train a model
+
+To generate a sequence, run
+
+`python3 ./test_lossgen.py <checkpoint> <percentage> output.txt --length 10000`
+
+where <checkpoint> is the .pth model file and <percentage> is the amount of loss (e.g. 0.2 for 20% loss).
diff --git a/dnn/torch/lossgen/export_lossgen.py b/dnn/torch/lossgen/export_lossgen.py
new file mode 100644
index 00000000..1f7df957
--- /dev/null
+++ b/dnn/torch/lossgen/export_lossgen.py
@@ -0,0 +1,101 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../weight-exchange'))
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint', type=str, help='model checkpoint')
+parser.add_argument('output_dir', type=str, help='output folder')
+
+args = parser.parse_args()
+
+import torch
+import numpy as np
+
+import lossgen
+from wexchange.torch import dump_torch_weights
+from wexchange.c_export import CWriter, print_vector
+
+def c_export(args, model):
+
+ message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}"
+
+ writer = CWriter(os.path.join(args.output_dir, "lossgen_data"), message=message, model_struct_name='LossGen')
+ writer.header.write(
+f"""
+#include "opus_types.h"
+"""
+ )
+
+ dense_layers = [
+ ('dense_in', "lossgen_dense_in"),
+ ('dense_out', "lossgen_dense_out")
+ ]
+
+
+ for name, export_name in dense_layers:
+ layer = model.get_submodule(name)
+ dump_torch_weights(writer, layer, name=export_name, verbose=True, quantize=False, scale=None)
+
+
+ gru_layers = [
+ ("gru1", "lossgen_gru1"),
+ ("gru2", "lossgen_gru2"),
+ ]
+
+ max_rnn_units = max([dump_torch_weights(writer, model.get_submodule(name), export_name, verbose=True, input_sparse=False, quantize=True, scale=None, recurrent_scale=None)
+ for name, export_name in gru_layers])
+
+ writer.header.write(
+f"""
+
+#define LOSSGEN_MAX_RNN_UNITS {max_rnn_units}
+
+"""
+ )
+
+ writer.close()
+
+
+if __name__ == "__main__":
+
+ os.makedirs(args.output_dir, exist_ok=True)
+ checkpoint = torch.load(args.checkpoint, map_location='cpu')
+ model = lossgen.LossGen(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+ model.load_state_dict(checkpoint['state_dict'], strict=False)
+ #model = LossGen()
+ #checkpoint = torch.load(args.checkpoint, map_location='cpu')
+ #model.load_state_dict(checkpoint['state_dict'])
+ c_export(args, model)
diff --git a/dnn/torch/lossgen/lossgen.py b/dnn/torch/lossgen/lossgen.py
new file mode 100644
index 00000000..9025165c
--- /dev/null
+++ b/dnn/torch/lossgen/lossgen.py
@@ -0,0 +1,29 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+class LossGen(nn.Module):
+ def __init__(self, gru1_size=16, gru2_size=16):
+ super(LossGen, self).__init__()
+
+ self.gru1_size = gru1_size
+ self.gru2_size = gru2_size
+ self.dense_in = nn.Linear(2, 8)
+ self.gru1 = nn.GRU(8, self.gru1_size, batch_first=True)
+ self.gru2 = nn.GRU(self.gru1_size, self.gru2_size, batch_first=True)
+ self.dense_out = nn.Linear(self.gru2_size, 1)
+
+ def forward(self, loss, perc, states=None):
+ #print(states)
+ device = loss.device
+ batch_size = loss.size(0)
+ if states is None:
+ gru1_state = torch.zeros((1, batch_size, self.gru1_size), device=device)
+ gru2_state = torch.zeros((1, batch_size, self.gru2_size), device=device)
+ else:
+ gru1_state = states[0]
+ gru2_state = states[1]
+ x = torch.tanh(self.dense_in(torch.cat([loss, perc], dim=-1)))
+ gru1_out, gru1_state = self.gru1(x, gru1_state)
+ gru2_out, gru2_state = self.gru2(gru1_out, gru2_state)
+ return self.dense_out(gru2_out), [gru1_state, gru2_state]
diff --git a/dnn/torch/lossgen/process_data.sh b/dnn/torch/lossgen/process_data.sh
new file mode 100755
index 00000000..308fd0aa
--- /dev/null
+++ b/dnn/torch/lossgen/process_data.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+#directory containing the loss files
+datadir=$1
+
+for i in $datadir/*_is_lost.txt
+do
+ perc=`cat $i | awk '{a+=$1}END{print a/NR}'`
+ echo $perc $i
+done > percentage_list.txt
+
+sort -n percentage_list.txt | awk '{print $2}' > percentage_sorted.txt
+
+for i in `cat percentage_sorted.txt`
+do
+ cat $i
+done > loss_sorted.txt
diff --git a/dnn/torch/lossgen/test_lossgen.py b/dnn/torch/lossgen/test_lossgen.py
new file mode 100644
index 00000000..95659b1f
--- /dev/null
+++ b/dnn/torch/lossgen/test_lossgen.py
@@ -0,0 +1,42 @@
+import lossgen
+import os
+import argparse
+import torch
+import numpy as np
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('model', type=str, help='CELPNet model')
+parser.add_argument('percentage', type=float, help='percentage loss')
+parser.add_argument('output', type=str, help='path to output file (ascii)')
+
+parser.add_argument('--length', type=int, help="length of sequence to generate", default=500)
+
+args = parser.parse_args()
+
+
+
+checkpoint = torch.load(args.model, map_location='cpu')
+model = lossgen.LossGen(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+model.load_state_dict(checkpoint['state_dict'], strict=False)
+
+states=None
+last = torch.zeros((1,1,1))
+perc = torch.tensor((args.percentage,))[None,None,:]
+seq = torch.zeros((0,1,1))
+
+one = torch.ones((1,1,1))
+zero = torch.zeros((1,1,1))
+
+if __name__ == '__main__':
+ for i in range(args.length):
+ prob, states = model(last, perc, states=states)
+ prob = torch.sigmoid(prob)
+ states[0] = states[0].detach()
+ states[1] = states[1].detach()
+ loss = one if np.random.rand() < prob else zero
+ last = loss
+ seq = torch.cat([seq, loss])
+
+np.savetxt(args.output, seq[:,:,0].numpy().astype('int'), fmt='%d')
diff --git a/dnn/torch/lossgen/train_lossgen.py b/dnn/torch/lossgen/train_lossgen.py
new file mode 100644
index 00000000..0bbe2798
--- /dev/null
+++ b/dnn/torch/lossgen/train_lossgen.py
@@ -0,0 +1,99 @@
+import numpy as np
+import torch
+from torch import nn
+import torch.nn.functional as F
+import tqdm
+from scipy.signal import lfilter
+import os
+import lossgen
+
+class LossDataset(torch.utils.data.Dataset):
+ def __init__(self,
+ loss_file,
+ sequence_length=997):
+
+ self.sequence_length = sequence_length
+
+ self.loss = np.loadtxt(loss_file, dtype='float32')
+
+ self.nb_sequences = self.loss.shape[0]//self.sequence_length
+ self.loss = self.loss[:self.nb_sequences*self.sequence_length]
+ self.perc = lfilter(np.array([.001], dtype='float32'), np.array([1., -.999], dtype='float32'), self.loss)
+
+ self.loss = np.reshape(self.loss, (self.nb_sequences, self.sequence_length, 1))
+ self.perc = np.reshape(self.perc, (self.nb_sequences, self.sequence_length, 1))
+
+ def __len__(self):
+ return self.nb_sequences
+
+ def __getitem__(self, index):
+ r0 = np.random.normal(scale=.1, size=(1,1)).astype('float32')
+ r1 = np.random.normal(scale=.1, size=(self.sequence_length,1)).astype('float32')
+ perc = self.perc[index, :, :]
+ perc = perc + (r0+r1)*perc*(1-perc)
+ return [self.loss[index, :, :], perc]
+
+
+adam_betas = [0.8, 0.98]
+adam_eps = 1e-8
+batch_size=256
+lr_decay = 0.001
+lr = 0.003
+epsilon = 1e-5
+epochs = 2000
+checkpoint_dir='checkpoint'
+os.makedirs(checkpoint_dir, exist_ok=True)
+checkpoint = dict()
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+checkpoint['model_args'] = ()
+checkpoint['model_kwargs'] = {'gru1_size': 16, 'gru2_size': 32}
+model = lossgen.LossGen(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+dataset = LossDataset('loss_sorted.txt')
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4)
+
+
+optimizer = torch.optim.AdamW(model.parameters(), lr=lr, betas=adam_betas, eps=adam_eps)
+
+
+# learning rate scheduler
+scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay * x))
+
+
+if __name__ == '__main__':
+ model.to(device)
+ states = None
+ for epoch in range(1, epochs + 1):
+
+ running_loss = 0
+
+ print(f"training epoch {epoch}...")
+ with tqdm.tqdm(dataloader, unit='batch') as tepoch:
+ for i, (loss, perc) in enumerate(tepoch):
+ optimizer.zero_grad()
+ loss = loss.to(device)
+ perc = perc.to(device)
+
+ out, states = model(loss, perc, states=states)
+ states = [state.detach() for state in states]
+ out = torch.sigmoid(out[:,:-1,:])
+ target = loss[:,1:,:]
+
+ loss = torch.mean(-target*torch.log(out+epsilon) - (1-target)*torch.log(1-out+epsilon))
+
+ loss.backward()
+ optimizer.step()
+
+ scheduler.step()
+
+ running_loss += loss.detach().cpu().item()
+ tepoch.set_postfix(loss=f"{running_loss/(i+1):8.5f}",
+ )
+
+ # save checkpoint
+ checkpoint_path = os.path.join(checkpoint_dir, f'lossgen_{epoch}.pth')
+ checkpoint['state_dict'] = model.state_dict()
+ checkpoint['loss'] = running_loss / len(dataloader)
+ checkpoint['epoch'] = epoch
+ torch.save(checkpoint, checkpoint_path)
diff --git a/dnn/torch/lpcnet/README.md b/dnn/torch/lpcnet/README.md
new file mode 100644
index 00000000..26d9ea19
--- /dev/null
+++ b/dnn/torch/lpcnet/README.md
@@ -0,0 +1,27 @@
+# LPCNet
+
+Incomplete pytorch implementation of LPCNet
+
+## Data preparation
+For data preparation use dump_data in github.com/xiph/LPCNet. To turn this into
+a training dataset, copy data and feature file to a folder and run
+
+python add_dataset_config.py my_dataset_folder
+
+
+## Training
+To train a model, create and adjust a setup file, e.g. with
+
+python make_default_setup.py my_setup.yml --path2dataset my_dataset_folder
+
+Then simply run
+
+python train_lpcnet.py my_setup.yml my_output
+
+## Inference
+Create feature file with dump_data from github.com/xiph/LPCNet. Then run e.g.
+
+python test_lpcnet.py features.f32 my_output/checkpoints/checkpoint_ep_10.pth out.wav
+
+Inference runs on CPU and takes usually between 3 and 20 seconds per generated second of audio,
+depending on the CPU.
diff --git a/dnn/torch/lpcnet/add_dataset_config.py b/dnn/torch/lpcnet/add_dataset_config.py
new file mode 100644
index 00000000..1b6b4e8c
--- /dev/null
+++ b/dnn/torch/lpcnet/add_dataset_config.py
@@ -0,0 +1,77 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import argparse
+import os
+
+import yaml
+
+
+from utils.templates import dataset_template_v1, dataset_template_v2
+
+
+
+
+parser = argparse.ArgumentParser("add_dataset_config.py")
+
+parser.add_argument('path', type=str, help='path to folder containing feature and data file')
+parser.add_argument('--version', type=int, help="dataset version, 1 for classic LPCNet with 55 feature slots, 2 for new format with 36 feature slots.", default=2)
+parser.add_argument('--description', type=str, help='brief dataset description', default="I will add a description later")
+args = parser.parse_args()
+
+
+if args.version == 1:
+ template = dataset_template_v1
+ data_extension = '.u8'
+elif args.version == 2:
+ template = dataset_template_v2
+ data_extension = '.s16'
+else:
+ raise ValueError(f"unknown dataset version {args.version}")
+
+# get folder content
+content = os.listdir(args.path)
+
+features = [c for c in content if c.endswith('.f32')]
+
+if len(features) != 1:
+ print("could not determine feature file")
+else:
+ template['feature_file'] = features[0]
+
+data = [c for c in content if c.endswith(data_extension)]
+if len(data) != 1:
+ print("could not determine data file")
+else:
+ template['signal_file'] = data[0]
+
+template['description'] = args.description
+
+with open(os.path.join(args.path, 'info.yml'), 'w') as f:
+ yaml.dump(template, f)
diff --git a/dnn/torch/lpcnet/data/__init__.py b/dnn/torch/lpcnet/data/__init__.py
new file mode 100644
index 00000000..50bad871
--- /dev/null
+++ b/dnn/torch/lpcnet/data/__init__.py
@@ -0,0 +1 @@
+from .lpcnet_dataset import LPCNetDataset \ No newline at end of file
diff --git a/dnn/torch/lpcnet/data/lpcnet_dataset.py b/dnn/torch/lpcnet/data/lpcnet_dataset.py
new file mode 100644
index 00000000..dbefc57d
--- /dev/null
+++ b/dnn/torch/lpcnet/data/lpcnet_dataset.py
@@ -0,0 +1,227 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+""" Dataset for LPCNet training """
+import os
+
+import yaml
+import torch
+import numpy as np
+from torch.utils.data import Dataset
+
+
+scale = 255.0/32768.0
+scale_1 = 32768.0/255.0
+def ulaw2lin(u):
+ u = u - 128
+ s = np.sign(u)
+ u = np.abs(u)
+ return s*scale_1*(np.exp(u/128.*np.log(256))-1)
+
+
+def lin2ulaw(x):
+ s = np.sign(x)
+ x = np.abs(x)
+ u = (s*(128*np.log(1+scale*x)/np.log(256)))
+ u = np.clip(128 + np.round(u), 0, 255)
+ return u
+
+
+def run_lpc(signal, lpcs, frame_length=160):
+ num_frames, lpc_order = lpcs.shape
+
+ prediction = np.concatenate(
+ [- np.convolve(signal[i * frame_length : (i + 1) * frame_length + lpc_order - 1], lpcs[i], mode='valid') for i in range(num_frames)]
+ )
+ error = signal[lpc_order :] - prediction
+
+ return prediction, error
+
+class LPCNetDataset(Dataset):
+ def __init__(self,
+ path_to_dataset,
+ features=['cepstrum', 'periods', 'pitch_corr'],
+ input_signals=['last_signal', 'prediction', 'last_error'],
+ target='error',
+ frames_per_sample=15,
+ feature_history=2,
+ feature_lookahead=2,
+ lpc_gamma=1):
+
+ super(LPCNetDataset, self).__init__()
+
+ # load dataset info
+ self.path_to_dataset = path_to_dataset
+ with open(os.path.join(path_to_dataset, 'info.yml'), 'r') as f:
+ dataset = yaml.load(f, yaml.FullLoader)
+
+ # dataset version
+ self.version = dataset['version']
+ if self.version == 1:
+ self.getitem = self.getitem_v1
+ elif self.version == 2:
+ self.getitem = self.getitem_v2
+ else:
+ raise ValueError(f"dataset version {self.version} unknown")
+
+ # features
+ self.feature_history = feature_history
+ self.feature_lookahead = feature_lookahead
+ self.frame_offset = 1 + self.feature_history
+ self.frames_per_sample = frames_per_sample
+ self.input_features = features
+ self.feature_frame_layout = dataset['feature_frame_layout']
+ self.lpc_gamma = lpc_gamma
+
+ # load feature file
+ self.feature_file = os.path.join(path_to_dataset, dataset['feature_file'])
+ self.features = np.memmap(self.feature_file, dtype=dataset['feature_dtype'])
+ self.feature_frame_length = dataset['feature_frame_length']
+
+ assert len(self.features) % self.feature_frame_length == 0
+ self.features = self.features.reshape((-1, self.feature_frame_length))
+
+ # derive number of samples is dataset
+ self.dataset_length = (len(self.features) - self.frame_offset - self.feature_lookahead - 1) // self.frames_per_sample
+
+ # signals
+ self.frame_length = dataset['frame_length']
+ self.signal_frame_layout = dataset['signal_frame_layout']
+ self.input_signals = input_signals
+ self.target = target
+
+ # load signals
+ self.signal_file = os.path.join(path_to_dataset, dataset['signal_file'])
+ self.signals = np.memmap(self.signal_file, dtype=dataset['signal_dtype'])
+ self.signal_frame_length = dataset['signal_frame_length']
+ self.signals = self.signals.reshape((-1, self.signal_frame_length))
+ assert len(self.signals) == len(self.features) * self.frame_length
+
+ def __getitem__(self, index):
+ return self.getitem(index)
+
+ def getitem_v2(self, index):
+ sample = dict()
+
+ # extract features
+ frame_start = self.frame_offset + index * self.frames_per_sample - self.feature_history
+ frame_stop = self.frame_offset + (index + 1) * self.frames_per_sample + self.feature_lookahead
+
+ for feature in self.input_features:
+ feature_start, feature_stop = self.feature_frame_layout[feature]
+ sample[feature] = self.features[frame_start : frame_stop, feature_start : feature_stop]
+
+ # convert periods
+ if 'periods' in self.input_features:
+ sample['periods'] = (0.1 + 50 * sample['periods'] + 100).astype('int16')
+
+ signal_start = (self.frame_offset + index * self.frames_per_sample) * self.frame_length
+ signal_stop = (self.frame_offset + (index + 1) * self.frames_per_sample) * self.frame_length
+
+ # last_signal and signal are always expected to be there
+ sample['last_signal'] = self.signals[signal_start : signal_stop, self.signal_frame_layout['last_signal']]
+ sample['signal'] = self.signals[signal_start : signal_stop, self.signal_frame_layout['signal']]
+
+ # calculate prediction and error if lpc coefficients present and prediction not given
+ if 'lpc' in self.feature_frame_layout and 'prediction' not in self.signal_frame_layout:
+ # lpc coefficients with one frame lookahead
+ # frame positions (start one frame early for past excitation)
+ frame_start = self.frame_offset + self.frames_per_sample * index - 1
+ frame_stop = self.frame_offset + self.frames_per_sample * (index + 1)
+
+ # feature positions
+ lpc_start, lpc_stop = self.feature_frame_layout['lpc']
+ lpc_order = lpc_stop - lpc_start
+ lpcs = self.features[frame_start : frame_stop, lpc_start : lpc_stop]
+
+ # LPC weighting
+ lpc_order = lpc_stop - lpc_start
+ weights = np.array([self.lpc_gamma ** (i + 1) for i in range(lpc_order)])
+ lpcs = lpcs * weights
+
+ # signal position (lpc_order samples as history)
+ signal_start = frame_start * self.frame_length - lpc_order + 1
+ signal_stop = frame_stop * self.frame_length + 1
+ noisy_signal = self.signals[signal_start : signal_stop, self.signal_frame_layout['last_signal']]
+ clean_signal = self.signals[signal_start - 1 : signal_stop - 1, self.signal_frame_layout['signal']]
+
+ noisy_prediction, noisy_error = run_lpc(noisy_signal, lpcs, frame_length=self.frame_length)
+
+ # extract signals
+ offset = self.frame_length
+ sample['prediction'] = noisy_prediction[offset : offset + self.frame_length * self.frames_per_sample]
+ sample['last_error'] = noisy_error[offset - 1 : offset - 1 + self.frame_length * self.frames_per_sample]
+ # calculate error between real signal and noisy prediction
+
+
+ sample['error'] = sample['signal'] - sample['prediction']
+
+
+ # concatenate features
+ feature_keys = [key for key in self.input_features if not key.startswith("periods")]
+ features = torch.concat([torch.FloatTensor(sample[key]) for key in feature_keys], dim=-1)
+ signals = torch.cat([torch.LongTensor(lin2ulaw(sample[key])).unsqueeze(-1) for key in self.input_signals], dim=-1)
+ target = torch.LongTensor(lin2ulaw(sample[self.target]))
+ periods = torch.LongTensor(sample['periods'])
+
+ return {'features' : features, 'periods' : periods, 'signals' : signals, 'target' : target}
+
+ def getitem_v1(self, index):
+ sample = dict()
+
+ # extract features
+ frame_start = self.frame_offset + index * self.frames_per_sample - self.feature_history
+ frame_stop = self.frame_offset + (index + 1) * self.frames_per_sample + self.feature_lookahead
+
+ for feature in self.input_features:
+ feature_start, feature_stop = self.feature_frame_layout[feature]
+ sample[feature] = self.features[frame_start : frame_stop, feature_start : feature_stop]
+
+ # convert periods
+ if 'periods' in self.input_features:
+ sample['periods'] = (0.1 + 50 * sample['periods'] + 100).astype('int16')
+
+ signal_start = (self.frame_offset + index * self.frames_per_sample) * self.frame_length
+ signal_stop = (self.frame_offset + (index + 1) * self.frames_per_sample) * self.frame_length
+
+ # last_signal and signal are always expected to be there
+ for signal_name, index in self.signal_frame_layout.items():
+ sample[signal_name] = self.signals[signal_start : signal_stop, index]
+
+ # concatenate features
+ feature_keys = [key for key in self.input_features if not key.startswith("periods")]
+ features = torch.concat([torch.FloatTensor(sample[key]) for key in feature_keys], dim=-1)
+ signals = torch.cat([torch.LongTensor(sample[key]).unsqueeze(-1) for key in self.input_signals], dim=-1)
+ target = torch.LongTensor(sample[self.target])
+ periods = torch.LongTensor(sample['periods'])
+
+ return {'features' : features, 'periods' : periods, 'signals' : signals, 'target' : target}
+
+ def __len__(self):
+ return self.dataset_length
diff --git a/dnn/torch/lpcnet/engine/lpcnet_engine.py b/dnn/torch/lpcnet/engine/lpcnet_engine.py
new file mode 100644
index 00000000..c964bdd1
--- /dev/null
+++ b/dnn/torch/lpcnet/engine/lpcnet_engine.py
@@ -0,0 +1,141 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from tqdm import tqdm
+import sys
+
+def train_one_epoch(model, criterion, optimizer, dataloader, device, scheduler, log_interval=10):
+
+ model.to(device)
+ model.train()
+
+ running_loss = 0
+ previous_running_loss = 0
+
+ # gru states
+ gru_a_state = torch.zeros(1, dataloader.batch_size, model.gru_a_units, device=device).to(device)
+ gru_b_state = torch.zeros(1, dataloader.batch_size, model.gru_b_units, device=device).to(device)
+ gru_states = [gru_a_state, gru_b_state]
+
+ with tqdm(dataloader, unit='batch', file=sys.stdout) as tepoch:
+
+ for i, batch in enumerate(tepoch):
+
+ # set gradients to zero
+ optimizer.zero_grad()
+
+ # zero out initial gru states
+ gru_a_state.zero_()
+ gru_b_state.zero_()
+
+ # push batch to device
+ for key in batch:
+ batch[key] = batch[key].to(device)
+
+ target = batch['target']
+
+ # calculate model output
+ output = model(batch['features'], batch['periods'], batch['signals'], gru_states)
+
+ # calculate loss
+ loss = criterion(output.permute(0, 2, 1), target)
+
+ # calculate gradients
+ loss.backward()
+
+ # update weights
+ optimizer.step()
+
+ # update learning rate
+ scheduler.step()
+
+ # call sparsifier
+ model.sparsify()
+
+ # update running loss
+ running_loss += float(loss.cpu())
+
+ # update status bar
+ if i % log_interval == 0:
+ tepoch.set_postfix(running_loss=f"{running_loss/(i + 1):8.7f}", current_loss=f"{(running_loss - previous_running_loss)/log_interval:8.7f}")
+ previous_running_loss = running_loss
+
+
+ running_loss /= len(dataloader)
+
+ return running_loss
+
+def evaluate(model, criterion, dataloader, device, log_interval=10):
+
+ model.to(device)
+ model.eval()
+
+ running_loss = 0
+ previous_running_loss = 0
+
+ # gru states
+ gru_a_state = torch.zeros(1, dataloader.batch_size, model.gru_a_units, device=device).to(device)
+ gru_b_state = torch.zeros(1, dataloader.batch_size, model.gru_b_units, device=device).to(device)
+ gru_states = [gru_a_state, gru_b_state]
+
+ with torch.no_grad():
+ with tqdm(dataloader, unit='batch', file=sys.stdout) as tepoch:
+
+ for i, batch in enumerate(tepoch):
+
+
+ # zero out initial gru states
+ gru_a_state.zero_()
+ gru_b_state.zero_()
+
+ # push batch to device
+ for key in batch:
+ batch[key] = batch[key].to(device)
+
+ target = batch['target']
+
+ # calculate model output
+ output = model(batch['features'], batch['periods'], batch['signals'], gru_states)
+
+ # calculate loss
+ loss = criterion(output.permute(0, 2, 1), target)
+
+ # update running loss
+ running_loss += float(loss.cpu())
+
+ # update status bar
+ if i % log_interval == 0:
+ tepoch.set_postfix(running_loss=f"{running_loss/(i + 1):8.7f}", current_loss=f"{(running_loss - previous_running_loss)/log_interval:8.7f}")
+ previous_running_loss = running_loss
+
+
+ running_loss /= len(dataloader)
+
+ return running_loss \ No newline at end of file
diff --git a/dnn/torch/lpcnet/make_default_setup.py b/dnn/torch/lpcnet/make_default_setup.py
new file mode 100644
index 00000000..17031d26
--- /dev/null
+++ b/dnn/torch/lpcnet/make_default_setup.py
@@ -0,0 +1,56 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import argparse
+
+import yaml
+
+from utils.templates import setup_dict
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('name', type=str, help='name of default setup file')
+parser.add_argument('--model', choices=['lpcnet', 'multi_rate'], help='LPCNet model name', default='lpcnet')
+parser.add_argument('--path2dataset', type=str, help='dataset path', default=None)
+
+args = parser.parse_args()
+
+setup = setup_dict[args.model]
+
+# update dataset if given
+if type(args.path2dataset) != type(None):
+ setup['dataset'] = args.path2dataset
+
+name = args.name
+if not name.endswith('.yml'):
+ name += '.yml'
+
+if __name__ == '__main__':
+ with open(name, 'w') as f:
+ f.write(yaml.dump(setup))
diff --git a/dnn/torch/lpcnet/make_test_config.py b/dnn/torch/lpcnet/make_test_config.py
new file mode 100644
index 00000000..7c414e01
--- /dev/null
+++ b/dnn/torch/lpcnet/make_test_config.py
@@ -0,0 +1,78 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import argparse
+import os
+import sys
+
+parser = argparse.ArgumentParser()
+parser.add_argument("config_name", type=str, help="name of config file (.yml will be appended)")
+parser.add_argument("test_name", type=str, help="name for test result display")
+parser.add_argument("checkpoint", type=str, help="checkpoint to test")
+parser.add_argument("--lpcnet-demo", type=str, help="path to lpcnet_demo binary, default: /local/code/LPCNet/lpcnet_demo", default="/local/code/LPCNet/lpcnet_demo")
+parser.add_argument("--lpcnext-path", type=str, help="path to lpcnext folder, defalut: dirname(__file__)", default=os.path.dirname(__file__))
+parser.add_argument("--python-exe", type=str, help='python executable path, default: sys.executable', default=sys.executable)
+parser.add_argument("--pad", type=str, help="left pad of output in seconds, default: 0.015", default="0.015")
+parser.add_argument("--trim", type=str, help="left trim of output in seconds, default: 0", default="0")
+
+
+
+template='''
+test: "{NAME}"
+processing:
+ - "sox {{INPUT}} {{INPUT}}.raw"
+ - "{LPCNET_DEMO} -features {{INPUT}}.raw {{INPUT}}.features.f32"
+ - "{PYTHON} {WORKING}/test_lpcnet.py {{INPUT}}.features.f32 {CHECKPOINT} {{OUTPUT}}.ua.wav"
+ - "sox {{OUTPUT}}.ua.wav {{OUTPUT}}.uap.wav pad {PAD}"
+ - "sox {{OUTPUT}}.uap.wav {{OUTPUT}} trim {TRIM}"
+ - "rm {{INPUT}}.raw {{OUTPUT}}.uap.wav {{OUTPUT}}.ua.wav {{INPUT}}.features.f32"
+'''
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+
+ file_content = template.format(
+ NAME=args.test_name,
+ LPCNET_DEMO=os.path.abspath(args.lpcnet_demo),
+ PYTHON=os.path.abspath(args.python_exe),
+ PAD=args.pad,
+ TRIM=args.trim,
+ WORKING=os.path.abspath(args.lpcnext_path),
+ CHECKPOINT=os.path.abspath(args.checkpoint)
+ )
+
+ print(file_content)
+
+ filename = args.config_name
+ if not filename.endswith(".yml"):
+ filename += ".yml"
+
+ with open(filename, "w") as f:
+ f.write(file_content)
diff --git a/dnn/torch/lpcnet/models/__init__.py b/dnn/torch/lpcnet/models/__init__.py
new file mode 100644
index 00000000..a26bc1cd
--- /dev/null
+++ b/dnn/torch/lpcnet/models/__init__.py
@@ -0,0 +1,8 @@
+from .lpcnet import LPCNet
+from .multi_rate_lpcnet import MultiRateLPCNet
+
+
+model_dict = {
+ 'lpcnet' : LPCNet,
+ 'multi_rate' : MultiRateLPCNet
+} \ No newline at end of file
diff --git a/dnn/torch/lpcnet/models/lpcnet.py b/dnn/torch/lpcnet/models/lpcnet.py
new file mode 100644
index 00000000..e83ae901
--- /dev/null
+++ b/dnn/torch/lpcnet/models/lpcnet.py
@@ -0,0 +1,303 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+import numpy as np
+
+from utils.ulaw import lin2ulawq, ulaw2lin
+from utils.sample import sample_excitation
+from utils.pcm import clip_to_int16
+from utils.sparsification import GRUSparsifier, calculate_gru_flops_per_step
+from utils.layers import DualFC
+from utils.misc import get_pdf_from_tree
+
+
+class LPCNet(nn.Module):
+ def __init__(self, config):
+ super(LPCNet, self).__init__()
+
+ #
+ self.input_layout = config['input_layout']
+ self.feature_history = config['feature_history']
+ self.feature_lookahead = config['feature_lookahead']
+
+ # frame rate network parameters
+ self.feature_dimension = config['feature_dimension']
+ self.period_embedding_dim = config['period_embedding_dim']
+ self.period_levels = config['period_levels']
+ self.feature_channels = self.feature_dimension + self.period_embedding_dim
+ self.feature_conditioning_dim = config['feature_conditioning_dim']
+ self.feature_conv_kernel_size = config['feature_conv_kernel_size']
+
+
+ # frame rate network layers
+ self.period_embedding = nn.Embedding(self.period_levels, self.period_embedding_dim)
+ self.feature_conv1 = nn.Conv1d(self.feature_channels, self.feature_conditioning_dim, self.feature_conv_kernel_size, padding='valid')
+ self.feature_conv2 = nn.Conv1d(self.feature_conditioning_dim, self.feature_conditioning_dim, self.feature_conv_kernel_size, padding='valid')
+ self.feature_dense1 = nn.Linear(self.feature_conditioning_dim, self.feature_conditioning_dim)
+ self.feature_dense2 = nn.Linear(*(2*[self.feature_conditioning_dim]))
+
+ # sample rate network parameters
+ self.frame_size = config['frame_size']
+ self.signal_levels = config['signal_levels']
+ self.signal_embedding_dim = config['signal_embedding_dim']
+ self.gru_a_units = config['gru_a_units']
+ self.gru_b_units = config['gru_b_units']
+ self.output_levels = config['output_levels']
+ self.hsampling = config.get('hsampling', False)
+
+ self.gru_a_input_dim = len(self.input_layout['signals']) * self.signal_embedding_dim + self.feature_conditioning_dim
+ self.gru_b_input_dim = self.gru_a_units + self.feature_conditioning_dim
+
+ # sample rate network layers
+ self.signal_embedding = nn.Embedding(self.signal_levels, self.signal_embedding_dim)
+ self.gru_a = nn.GRU(self.gru_a_input_dim, self.gru_a_units, batch_first=True)
+ self.gru_b = nn.GRU(self.gru_b_input_dim, self.gru_b_units, batch_first=True)
+ self.dual_fc = DualFC(self.gru_b_units, self.output_levels)
+
+ # sparsification
+ self.sparsifier = []
+
+ # GRU A
+ if 'gru_a' in config['sparsification']:
+ gru_config = config['sparsification']['gru_a']
+ task_list = [(self.gru_a, gru_config['params'])]
+ self.sparsifier.append(GRUSparsifier(task_list,
+ gru_config['start'],
+ gru_config['stop'],
+ gru_config['interval'],
+ gru_config['exponent'])
+ )
+ self.gru_a_flops_per_step = calculate_gru_flops_per_step(self.gru_a,
+ gru_config['params'], drop_input=True)
+ else:
+ self.gru_a_flops_per_step = calculate_gru_flops_per_step(self.gru_a, drop_input=True)
+
+ # GRU B
+ if 'gru_b' in config['sparsification']:
+ gru_config = config['sparsification']['gru_b']
+ task_list = [(self.gru_b, gru_config['params'])]
+ self.sparsifier.append(GRUSparsifier(task_list,
+ gru_config['start'],
+ gru_config['stop'],
+ gru_config['interval'],
+ gru_config['exponent'])
+ )
+ self.gru_b_flops_per_step = calculate_gru_flops_per_step(self.gru_b,
+ gru_config['params'])
+ else:
+ self.gru_b_flops_per_step = calculate_gru_flops_per_step(self.gru_b)
+
+ # inference parameters
+ self.lpc_gamma = config.get('lpc_gamma', 1)
+
+ def sparsify(self):
+ for sparsifier in self.sparsifier:
+ sparsifier.step()
+
+ def get_gflops(self, fs, verbose=False):
+ gflops = 0
+
+ # frame rate network
+ conditioning_dim = self.feature_conditioning_dim
+ feature_channels = self.feature_channels
+ frame_rate = fs / self.frame_size
+ frame_rate_network_complexity = 1e-9 * 2 * (5 * conditioning_dim + 3 * feature_channels) * conditioning_dim * frame_rate
+ if verbose:
+ print(f"frame rate network: {frame_rate_network_complexity} GFLOPS")
+ gflops += frame_rate_network_complexity
+
+ # gru a
+ gru_a_rate = fs
+ gru_a_complexity = 1e-9 * gru_a_rate * self.gru_a_flops_per_step
+ if verbose:
+ print(f"gru A: {gru_a_complexity} GFLOPS")
+ gflops += gru_a_complexity
+
+ # gru b
+ gru_b_rate = fs
+ gru_b_complexity = 1e-9 * gru_b_rate * self.gru_b_flops_per_step
+ if verbose:
+ print(f"gru B: {gru_b_complexity} GFLOPS")
+ gflops += gru_b_complexity
+
+
+ # dual fcs
+ fc = self.dual_fc
+ rate = fs
+ input_size = fc.dense1.in_features
+ output_size = fc.dense1.out_features
+ dual_fc_complexity = 1e-9 * (4 * input_size * output_size + 22 * output_size) * rate
+ if self.hsampling:
+ dual_fc_complexity /= 8
+ if verbose:
+ print(f"dual_fc: {dual_fc_complexity} GFLOPS")
+ gflops += dual_fc_complexity
+
+ if verbose:
+ print(f'total: {gflops} GFLOPS')
+
+ return gflops
+
+ def frame_rate_network(self, features, periods):
+
+ embedded_periods = torch.flatten(self.period_embedding(periods), 2, 3)
+ features = torch.concat((features, embedded_periods), dim=-1)
+
+ # convert to channels first and calculate conditioning vector
+ c = torch.permute(features, [0, 2, 1])
+
+ c = torch.tanh(self.feature_conv1(c))
+ c = torch.tanh(self.feature_conv2(c))
+ # back to channels last
+ c = torch.permute(c, [0, 2, 1])
+ c = torch.tanh(self.feature_dense1(c))
+ c = torch.tanh(self.feature_dense2(c))
+
+ return c
+
+ def sample_rate_network(self, signals, c, gru_states):
+ embedded_signals = torch.flatten(self.signal_embedding(signals), 2, 3)
+ c_upsampled = torch.repeat_interleave(c, self.frame_size, dim=1)
+
+ y = torch.concat((embedded_signals, c_upsampled), dim=-1)
+ y, gru_a_state = self.gru_a(y, gru_states[0])
+ y = torch.concat((y, c_upsampled), dim=-1)
+ y, gru_b_state = self.gru_b(y, gru_states[1])
+
+ y = self.dual_fc(y)
+
+ if self.hsampling:
+ y = torch.sigmoid(y)
+ log_probs = torch.log(get_pdf_from_tree(y) + 1e-6)
+ else:
+ log_probs = torch.log_softmax(y, dim=-1)
+
+ return log_probs, (gru_a_state, gru_b_state)
+
+ def decoder(self, signals, c, gru_states):
+ embedded_signals = torch.flatten(self.signal_embedding(signals), 2, 3)
+
+ y = torch.concat((embedded_signals, c), dim=-1)
+ y, gru_a_state = self.gru_a(y, gru_states[0])
+ y = torch.concat((y, c), dim=-1)
+ y, gru_b_state = self.gru_b(y, gru_states[1])
+
+ y = self.dual_fc(y)
+
+ if self.hsampling:
+ y = torch.sigmoid(y)
+ probs = get_pdf_from_tree(y)
+ else:
+ probs = torch.softmax(y, dim=-1)
+
+ return probs, (gru_a_state, gru_b_state)
+
+ def forward(self, features, periods, signals, gru_states):
+
+ c = self.frame_rate_network(features, periods)
+ log_probs, _ = self.sample_rate_network(signals, c, gru_states)
+
+ return log_probs
+
+ def generate(self, features, periods, lpcs):
+
+ with torch.no_grad():
+ device = self.parameters().__next__().device
+
+ num_frames = features.shape[0] - self.feature_history - self.feature_lookahead
+ lpc_order = lpcs.shape[-1]
+ num_input_signals = len(self.input_layout['signals'])
+ pitch_corr_position = self.input_layout['features']['pitch_corr'][0]
+
+ # signal buffers
+ pcm = torch.zeros((num_frames * self.frame_size + lpc_order))
+ output = torch.zeros((num_frames * self.frame_size), dtype=torch.int16)
+ mem = 0
+
+ # state buffers
+ gru_a_state = torch.zeros((1, 1, self.gru_a_units))
+ gru_b_state = torch.zeros((1, 1, self.gru_b_units))
+ gru_states = [gru_a_state, gru_b_state]
+
+ input_signals = torch.zeros((1, 1, num_input_signals), dtype=torch.long) + 128
+
+ # push data to device
+ features = features.to(device)
+ periods = periods.to(device)
+ lpcs = lpcs.to(device)
+
+ # lpc weighting
+ weights = torch.FloatTensor([self.lpc_gamma ** (i + 1) for i in range(lpc_order)]).to(device)
+ lpcs = lpcs * weights
+
+ # run feature encoding
+ c = self.frame_rate_network(features.unsqueeze(0), periods.unsqueeze(0))
+
+ for frame_index in range(num_frames):
+ frame_start = frame_index * self.frame_size
+ pitch_corr = features[frame_index + self.feature_history, pitch_corr_position]
+ a = - torch.flip(lpcs[frame_index + self.feature_history], [0])
+ current_c = c[:, frame_index : frame_index + 1, :]
+
+ for i in range(self.frame_size):
+ pcm_position = frame_start + i + lpc_order
+ output_position = frame_start + i
+
+ # prepare input
+ pred = torch.sum(pcm[pcm_position - lpc_order : pcm_position] * a)
+ if 'prediction' in self.input_layout['signals']:
+ input_signals[0, 0, self.input_layout['signals']['prediction']] = lin2ulawq(pred)
+
+ # run single step of sample rate network
+ probs, gru_states = self.decoder(
+ input_signals,
+ current_c,
+ gru_states
+ )
+
+ # sample from output
+ exc_ulaw = sample_excitation(probs, pitch_corr)
+
+ # signal generation
+ exc = ulaw2lin(exc_ulaw)
+ sig = exc + pred
+ pcm[pcm_position] = sig
+ mem = 0.85 * mem + float(sig)
+ output[output_position] = clip_to_int16(round(mem))
+
+ # buffer update
+ if 'last_signal' in self.input_layout['signals']:
+ input_signals[0, 0, self.input_layout['signals']['last_signal']] = lin2ulawq(sig)
+
+ if 'last_error' in self.input_layout['signals']:
+ input_signals[0, 0, self.input_layout['signals']['last_error']] = lin2ulawq(exc)
+
+ return output
diff --git a/dnn/torch/lpcnet/models/multi_rate_lpcnet.py b/dnn/torch/lpcnet/models/multi_rate_lpcnet.py
new file mode 100644
index 00000000..e613e74c
--- /dev/null
+++ b/dnn/torch/lpcnet/models/multi_rate_lpcnet.py
@@ -0,0 +1,437 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+from utils.layers.subconditioner import get_subconditioner
+from utils.layers import DualFC
+
+from utils.ulaw import lin2ulawq, ulaw2lin
+from utils.sample import sample_excitation
+from utils.pcm import clip_to_int16
+from utils.sparsification import GRUSparsifier, calculate_gru_flops_per_step
+
+from utils.misc import interleave_tensors
+
+
+
+
+# MultiRateLPCNet
+class MultiRateLPCNet(nn.Module):
+ def __init__(self, config):
+ super(MultiRateLPCNet, self).__init__()
+
+ # general parameters
+ self.input_layout = config['input_layout']
+ self.feature_history = config['feature_history']
+ self.feature_lookahead = config['feature_lookahead']
+ self.signals = config['signals']
+
+ # frame rate network parameters
+ self.feature_dimension = config['feature_dimension']
+ self.period_embedding_dim = config['period_embedding_dim']
+ self.period_levels = config['period_levels']
+ self.feature_channels = self.feature_dimension + self.period_embedding_dim
+ self.feature_conditioning_dim = config['feature_conditioning_dim']
+ self.feature_conv_kernel_size = config['feature_conv_kernel_size']
+
+ # frame rate network layers
+ self.period_embedding = nn.Embedding(self.period_levels, self.period_embedding_dim)
+ self.feature_conv1 = nn.Conv1d(self.feature_channels, self.feature_conditioning_dim, self.feature_conv_kernel_size, padding='valid')
+ self.feature_conv2 = nn.Conv1d(self.feature_conditioning_dim, self.feature_conditioning_dim, self.feature_conv_kernel_size, padding='valid')
+ self.feature_dense1 = nn.Linear(self.feature_conditioning_dim, self.feature_conditioning_dim)
+ self.feature_dense2 = nn.Linear(*(2*[self.feature_conditioning_dim]))
+
+ # sample rate network parameters
+ self.frame_size = config['frame_size']
+ self.signal_levels = config['signal_levels']
+ self.signal_embedding_dim = config['signal_embedding_dim']
+ self.gru_a_units = config['gru_a_units']
+ self.gru_b_units = config['gru_b_units']
+ self.output_levels = config['output_levels']
+
+ # subconditioning B
+ sub_config = config['subconditioning']['subconditioning_b']
+ self.substeps_b = sub_config['number_of_subsamples']
+ self.subcondition_signals_b = sub_config['signals']
+ self.signals_idx_b = [self.input_layout['signals'][key] for key in sub_config['signals']]
+ method = sub_config['method']
+ kwargs = sub_config['kwargs']
+ if type(kwargs) == type(None):
+ kwargs = dict()
+
+ state_size = self.gru_b_units
+ self.subconditioner_b = get_subconditioner(method,
+ sub_config['number_of_subsamples'], sub_config['pcm_embedding_size'],
+ state_size, self.signal_levels, len(sub_config['signals']),
+ **sub_config['kwargs'])
+
+ # subconditioning A
+ sub_config = config['subconditioning']['subconditioning_a']
+ self.substeps_a = sub_config['number_of_subsamples']
+ self.subcondition_signals_a = sub_config['signals']
+ self.signals_idx_a = [self.input_layout['signals'][key] for key in sub_config['signals']]
+ method = sub_config['method']
+ kwargs = sub_config['kwargs']
+ if type(kwargs) == type(None):
+ kwargs = dict()
+
+ state_size = self.gru_a_units
+ self.subconditioner_a = get_subconditioner(method,
+ sub_config['number_of_subsamples'], sub_config['pcm_embedding_size'],
+ state_size, self.signal_levels, self.substeps_b * len(sub_config['signals']),
+ **sub_config['kwargs'])
+
+
+ # wrap up subconditioning, group_size_gru_a holds the number
+ # of timesteps that are grouped as sample input for GRU A
+ # input and group_size_subcondition_a holds the number of samples that are
+ # grouped as input to pre-GRU B subconditioning
+ self.group_size_gru_a = self.substeps_a * self.substeps_b
+ self.group_size_subcondition_a = self.substeps_b
+ self.gru_a_rate_divider = self.group_size_gru_a
+ self.gru_b_rate_divider = self.substeps_b
+
+ # gru sizes
+ self.gru_a_input_dim = self.group_size_gru_a * len(self.signals) * self.signal_embedding_dim + self.feature_conditioning_dim
+ self.gru_b_input_dim = self.subconditioner_a.get_output_dim(0) + self.feature_conditioning_dim
+ self.signals_idx = [self.input_layout['signals'][key] for key in self.signals]
+
+ # sample rate network layers
+ self.signal_embedding = nn.Embedding(self.signal_levels, self.signal_embedding_dim)
+ self.gru_a = nn.GRU(self.gru_a_input_dim, self.gru_a_units, batch_first=True)
+ self.gru_b = nn.GRU(self.gru_b_input_dim, self.gru_b_units, batch_first=True)
+
+ # sparsification
+ self.sparsifier = []
+
+ # GRU A
+ if 'gru_a' in config['sparsification']:
+ gru_config = config['sparsification']['gru_a']
+ task_list = [(self.gru_a, gru_config['params'])]
+ self.sparsifier.append(GRUSparsifier(task_list,
+ gru_config['start'],
+ gru_config['stop'],
+ gru_config['interval'],
+ gru_config['exponent'])
+ )
+ self.gru_a_flops_per_step = calculate_gru_flops_per_step(self.gru_a,
+ gru_config['params'], drop_input=True)
+ else:
+ self.gru_a_flops_per_step = calculate_gru_flops_per_step(self.gru_a, drop_input=True)
+
+ # GRU B
+ if 'gru_b' in config['sparsification']:
+ gru_config = config['sparsification']['gru_b']
+ task_list = [(self.gru_b, gru_config['params'])]
+ self.sparsifier.append(GRUSparsifier(task_list,
+ gru_config['start'],
+ gru_config['stop'],
+ gru_config['interval'],
+ gru_config['exponent'])
+ )
+ self.gru_b_flops_per_step = calculate_gru_flops_per_step(self.gru_b,
+ gru_config['params'])
+ else:
+ self.gru_b_flops_per_step = calculate_gru_flops_per_step(self.gru_b)
+
+
+
+ # dual FCs
+ self.dual_fc = []
+ for i in range(self.substeps_b):
+ dim = self.subconditioner_b.get_output_dim(i)
+ self.dual_fc.append(DualFC(dim, self.output_levels))
+ self.add_module(f"dual_fc_{i}", self.dual_fc[-1])
+
+ def get_gflops(self, fs, verbose=False, hierarchical_sampling=False):
+ gflops = 0
+
+ # frame rate network
+ conditioning_dim = self.feature_conditioning_dim
+ feature_channels = self.feature_channels
+ frame_rate = fs / self.frame_size
+ frame_rate_network_complexity = 1e-9 * 2 * (5 * conditioning_dim + 3 * feature_channels) * conditioning_dim * frame_rate
+ if verbose:
+ print(f"frame rate network: {frame_rate_network_complexity} GFLOPS")
+ gflops += frame_rate_network_complexity
+
+ # gru a
+ gru_a_rate = fs / self.group_size_gru_a
+ gru_a_complexity = 1e-9 * gru_a_rate * self.gru_a_flops_per_step
+ if verbose:
+ print(f"gru A: {gru_a_complexity} GFLOPS")
+ gflops += gru_a_complexity
+
+ # subconditioning a
+ subcond_a_rate = fs / self.substeps_b
+ subconditioning_a_complexity = 1e-9 * self.subconditioner_a.get_average_flops_per_step() * subcond_a_rate
+ if verbose:
+ print(f"subconditioning A: {subconditioning_a_complexity} GFLOPS")
+ gflops += subconditioning_a_complexity
+
+ # gru b
+ gru_b_rate = fs / self.substeps_b
+ gru_b_complexity = 1e-9 * gru_b_rate * self.gru_b_flops_per_step
+ if verbose:
+ print(f"gru B: {gru_b_complexity} GFLOPS")
+ gflops += gru_b_complexity
+
+ # subconditioning b
+ subcond_b_rate = fs
+ subconditioning_b_complexity = 1e-9 * self.subconditioner_b.get_average_flops_per_step() * subcond_b_rate
+ if verbose:
+ print(f"subconditioning B: {subconditioning_b_complexity} GFLOPS")
+ gflops += subconditioning_b_complexity
+
+ # dual fcs
+ for i, fc in enumerate(self.dual_fc):
+ rate = fs / len(self.dual_fc)
+ input_size = fc.dense1.in_features
+ output_size = fc.dense1.out_features
+ dual_fc_complexity = 1e-9 * (4 * input_size * output_size + 22 * output_size) * rate
+ if hierarchical_sampling:
+ dual_fc_complexity /= 8
+ if verbose:
+ print(f"dual_fc_{i}: {dual_fc_complexity} GFLOPS")
+ gflops += dual_fc_complexity
+
+ if verbose:
+ print(f'total: {gflops} GFLOPS')
+
+ return gflops
+
+
+
+ def sparsify(self):
+ for sparsifier in self.sparsifier:
+ sparsifier.step()
+
+ def frame_rate_network(self, features, periods):
+
+ embedded_periods = torch.flatten(self.period_embedding(periods), 2, 3)
+ features = torch.concat((features, embedded_periods), dim=-1)
+
+ # convert to channels first and calculate conditioning vector
+ c = torch.permute(features, [0, 2, 1])
+
+ c = torch.tanh(self.feature_conv1(c))
+ c = torch.tanh(self.feature_conv2(c))
+ # back to channels last
+ c = torch.permute(c, [0, 2, 1])
+ c = torch.tanh(self.feature_dense1(c))
+ c = torch.tanh(self.feature_dense2(c))
+
+ return c
+
+ def prepare_signals(self, signals, group_size, signal_idx):
+ """ extracts, delays and groups signals """
+
+ batch_size, sequence_length, num_signals = signals.shape
+
+ # extract signals according to position
+ signals = torch.cat([signals[:, :, i : i + 1] for i in signal_idx],
+ dim=-1)
+
+ # roll back pcm to account for grouping
+ signals = torch.roll(signals, group_size - 1, -2)
+
+ # reshape
+ signals = torch.reshape(signals,
+ (batch_size, sequence_length // group_size, group_size * len(signal_idx)))
+
+ return signals
+
+
+ def sample_rate_network(self, signals, c, gru_states):
+
+ signals_a = self.prepare_signals(signals, self.group_size_gru_a, self.signals_idx)
+ embedded_signals = torch.flatten(self.signal_embedding(signals_a), 2, 3)
+ # features at GRU A rate
+ c_upsampled_a = torch.repeat_interleave(c, self.frame_size // self.gru_a_rate_divider, dim=1)
+ # features at GRU B rate
+ c_upsampled_b = torch.repeat_interleave(c, self.frame_size // self.gru_b_rate_divider, dim=1)
+
+ y = torch.concat((embedded_signals, c_upsampled_a), dim=-1)
+ y, gru_a_state = self.gru_a(y, gru_states[0])
+ # first round of upsampling and subconditioning
+ c_signals_a = self.prepare_signals(signals, self.group_size_subcondition_a, self.signals_idx_a)
+ y = self.subconditioner_a(y, c_signals_a)
+ y = interleave_tensors(y)
+
+ y = torch.concat((y, c_upsampled_b), dim=-1)
+ y, gru_b_state = self.gru_b(y, gru_states[1])
+ c_signals_b = self.prepare_signals(signals, 1, self.signals_idx_b)
+ y = self.subconditioner_b(y, c_signals_b)
+
+ y = [self.dual_fc[i](y[i]) for i in range(self.substeps_b)]
+ y = interleave_tensors(y)
+
+ return y, (gru_a_state, gru_b_state)
+
+ def decoder(self, signals, c, gru_states):
+ embedded_signals = torch.flatten(self.signal_embedding(signals), 2, 3)
+
+ y = torch.concat((embedded_signals, c), dim=-1)
+ y, gru_a_state = self.gru_a(y, gru_states[0])
+ y = torch.concat((y, c), dim=-1)
+ y, gru_b_state = self.gru_b(y, gru_states[1])
+
+ y = self.dual_fc(y)
+
+ return torch.softmax(y, dim=-1), (gru_a_state, gru_b_state)
+
+ def forward(self, features, periods, signals, gru_states):
+
+ c = self.frame_rate_network(features, periods)
+ y, _ = self.sample_rate_network(signals, c, gru_states)
+ log_probs = torch.log_softmax(y, dim=-1)
+
+ return log_probs
+
+ def generate(self, features, periods, lpcs):
+
+ with torch.no_grad():
+ device = self.parameters().__next__().device
+
+ num_frames = features.shape[0] - self.feature_history - self.feature_lookahead
+ lpc_order = lpcs.shape[-1]
+ num_input_signals = len(self.signals)
+ pitch_corr_position = self.input_layout['features']['pitch_corr'][0]
+
+ # signal buffers
+ last_signal = torch.zeros((num_frames * self.frame_size + lpc_order + 1))
+ prediction = torch.zeros((num_frames * self.frame_size + lpc_order + 1))
+ last_error = torch.zeros((num_frames * self.frame_size + lpc_order + 1))
+ output = torch.zeros((num_frames * self.frame_size), dtype=torch.int16)
+ mem = 0
+
+ # state buffers
+ gru_a_state = torch.zeros((1, 1, self.gru_a_units))
+ gru_b_state = torch.zeros((1, 1, self.gru_b_units))
+
+ input_signals = 128 + torch.zeros(self.group_size_gru_a * num_input_signals, dtype=torch.long)
+ # conditioning signals for subconditioner a
+ c_signals_a = 128 + torch.zeros(self.group_size_subcondition_a * len(self.signals_idx_a), dtype=torch.long)
+ # conditioning signals for subconditioner b
+ c_signals_b = 128 + torch.zeros(len(self.signals_idx_b), dtype=torch.long)
+
+ # signal dict
+ signal_dict = {
+ 'prediction' : prediction,
+ 'last_error' : last_error,
+ 'last_signal' : last_signal
+ }
+
+ # push data to device
+ features = features.to(device)
+ periods = periods.to(device)
+ lpcs = lpcs.to(device)
+
+ # run feature encoding
+ c = self.frame_rate_network(features.unsqueeze(0), periods.unsqueeze(0))
+
+ for frame_index in range(num_frames):
+ frame_start = frame_index * self.frame_size
+ pitch_corr = features[frame_index + self.feature_history, pitch_corr_position]
+ a = - torch.flip(lpcs[frame_index + self.feature_history], [0])
+ current_c = c[:, frame_index : frame_index + 1, :]
+
+ for i in range(0, self.frame_size, self.group_size_gru_a):
+ pcm_position = frame_start + i + lpc_order
+ output_position = frame_start + i
+
+ # calculate newest prediction
+ prediction[pcm_position] = torch.sum(last_signal[pcm_position - lpc_order + 1: pcm_position + 1] * a)
+
+ # prepare input
+ for slot in range(self.group_size_gru_a):
+ k = slot - self.group_size_gru_a + 1
+ for idx, name in enumerate(self.signals):
+ input_signals[idx + slot * num_input_signals] = lin2ulawq(
+ signal_dict[name][pcm_position + k]
+ )
+
+
+ # run GRU A
+ embed_signals = self.signal_embedding(input_signals.reshape((1, 1, -1)))
+ embed_signals = torch.flatten(embed_signals, 2)
+ y = torch.cat((embed_signals, current_c), dim=-1)
+ h_a, gru_a_state = self.gru_a(y, gru_a_state)
+
+ # loop over substeps_a
+ for step_a in range(self.substeps_a):
+ # prepare conditioning input
+ for slot in range(self.group_size_subcondition_a):
+ k = slot - self.group_size_subcondition_a + 1
+ for idx, name in enumerate(self.subcondition_signals_a):
+ c_signals_a[idx + slot * num_input_signals] = lin2ulawq(
+ signal_dict[name][pcm_position + k]
+ )
+
+ # subconditioning
+ h_a = self.subconditioner_a.single_step(step_a, h_a, c_signals_a.reshape((1, 1, -1)))
+
+ # run GRU B
+ y = torch.cat((h_a, current_c), dim=-1)
+ h_b, gru_b_state = self.gru_b(y, gru_b_state)
+
+ # loop over substeps b
+ for step_b in range(self.substeps_b):
+ # prepare subconditioning input
+ for idx, name in enumerate(self.subcondition_signals_b):
+ c_signals_b[idx] = lin2ulawq(
+ signal_dict[name][pcm_position]
+ )
+
+ # subcondition
+ h_b = self.subconditioner_b.single_step(step_b, h_b, c_signals_b.reshape((1, 1, -1)))
+
+ # run dual FC
+ probs = torch.softmax(self.dual_fc[step_b](h_b), dim=-1)
+
+ # sample
+ new_exc = ulaw2lin(sample_excitation(probs, pitch_corr))
+
+ # update signals
+ sig = new_exc + prediction[pcm_position]
+ last_error[pcm_position + 1] = new_exc
+ last_signal[pcm_position + 1] = sig
+
+ mem = 0.85 * mem + float(sig)
+ output[output_position] = clip_to_int16(round(mem))
+
+ # increase positions
+ pcm_position += 1
+ output_position += 1
+
+ # calculate next prediction
+ prediction[pcm_position] = torch.sum(last_signal[pcm_position - lpc_order + 1: pcm_position + 1] * a)
+
+ return output
diff --git a/dnn/torch/lpcnet/print_lpcnet_complexity.py b/dnn/torch/lpcnet/print_lpcnet_complexity.py
new file mode 100644
index 00000000..d6072dc5
--- /dev/null
+++ b/dnn/torch/lpcnet/print_lpcnet_complexity.py
@@ -0,0 +1,64 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import argparse
+
+import yaml
+
+from models import model_dict
+
+
+debug = False
+if debug:
+ args = type('dummy', (object,),
+ {
+ 'setup' : 'setups/lpcnet_m/setup_1_4_concatenative.yml',
+ 'hierarchical_sampling' : False
+ })()
+else:
+ parser = argparse.ArgumentParser()
+ parser.add_argument('setup', type=str, help='setup yaml file')
+ parser.add_argument('--hierarchical-sampling', action="store_true", help='whether to assume hierarchical sampling (default=False)', default=False)
+
+ args = parser.parse_args()
+
+with open(args.setup, 'r') as f:
+ setup = yaml.load(f.read(), yaml.FullLoader)
+
+# check model
+if not 'model' in setup['lpcnet']:
+ print(f'warning: did not find model entry in setup, using default lpcnet')
+ model_name = 'lpcnet'
+else:
+ model_name = setup['lpcnet']['model']
+
+# create model
+model = model_dict[model_name](setup['lpcnet']['config'])
+
+gflops = model.get_gflops(16000, verbose=True, hierarchical_sampling=args.hierarchical_sampling)
diff --git a/dnn/torch/lpcnet/scripts/collect_multi_run_results.py b/dnn/torch/lpcnet/scripts/collect_multi_run_results.py
new file mode 100644
index 00000000..ae662a4f
--- /dev/null
+++ b/dnn/torch/lpcnet/scripts/collect_multi_run_results.py
@@ -0,0 +1,190 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import argparse
+import os
+from uuid import UUID
+from collections import OrderedDict
+import pickle
+
+
+import torch
+import numpy as np
+
+import utils
+
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("input", type=str, help="input folder containing multi-run output")
+parser.add_argument("tag", type=str, help="tag for multi-run experiment")
+parser.add_argument("csv", type=str, help="name for output csv")
+
+
+def is_uuid(val):
+ try:
+ UUID(val)
+ return True
+ except:
+ return False
+
+
+def collect_results(folder):
+
+ training_folder = os.path.join(folder, 'training')
+ testing_folder = os.path.join(folder, 'testing')
+
+ # validation loss
+ checkpoint = torch.load(os.path.join(training_folder, 'checkpoints', 'checkpoint_finalize_epoch_1.pth'), map_location='cpu')
+ validation_loss = checkpoint['validation_loss']
+
+ # eval_warpq
+ eval_warpq = utils.data.parse_warpq_scores(os.path.join(training_folder, 'out_finalize.txt'))[-1]
+
+ # testing results
+ testing_results = utils.data.collect_test_stats(os.path.join(testing_folder, 'final'))
+
+ results = OrderedDict()
+ results['eval_loss'] = validation_loss
+ results['eval_warpq'] = eval_warpq
+ results['pesq_mean'] = testing_results['pesq'][0]
+ results['warpq_mean'] = testing_results['warpq'][0]
+ results['pitch_error_mean'] = testing_results['pitch_error'][0]
+ results['voicing_error_mean'] = testing_results['voicing_error'][0]
+
+ return results
+
+def print_csv(path, results, tag, ranks=None, header=True):
+
+ metrics = next(iter(results.values())).keys()
+ if ranks is not None:
+ rank_keys = next(iter(ranks.values())).keys()
+ else:
+ rank_keys = []
+
+ with open(path, 'w') as f:
+ if header:
+ f.write("uuid, tag")
+
+ for metric in metrics:
+ f.write(f", {metric}")
+
+ for rank in rank_keys:
+ f.write(f", {rank}")
+
+ f.write("\n")
+
+
+ for uuid, values in results.items():
+ f.write(f"{uuid}, {tag}")
+
+ for val in values.values():
+ f.write(f", {val:10.8f}")
+
+ for rank in rank_keys:
+ f.write(f", {ranks[uuid][rank]:4d}")
+
+ f.write("\n")
+
+def get_ranks(results):
+
+ metrics = list(next(iter(results.values())).keys())
+
+ positive = {'pesq_mean', 'mix'}
+
+ ranks = OrderedDict()
+ for key in results.keys():
+ ranks[key] = OrderedDict()
+
+ for metric in metrics:
+ sign = -1 if metric in positive else 1
+
+ x = sorted([(key, value[metric]) for key, value in results.items()], key=lambda x: sign * x[1])
+ x = [y[0] for y in x]
+
+ for key in results.keys():
+ ranks[key]['rank_' + metric] = x.index(key) + 1
+
+ return ranks
+
+def analyse_metrics(results):
+ metrics = ['eval_loss', 'pesq_mean', 'warpq_mean', 'pitch_error_mean', 'voicing_error_mean']
+
+ x = []
+ for metric in metrics:
+ x.append([val[metric] for val in results.values()])
+
+ x = np.array(x)
+
+ print(x)
+
+def add_mix_metric(results):
+ metrics = ['eval_loss', 'pesq_mean', 'warpq_mean', 'pitch_error_mean', 'voicing_error_mean']
+
+ x = []
+ for metric in metrics:
+ x.append([val[metric] for val in results.values()])
+
+ x = np.array(x).transpose() * np.array([-1, 1, -1, -1, -1])
+
+ z = (x - np.mean(x, axis=0)) / np.std(x, axis=0)
+
+ print(f"covariance matrix for normalized scores of {metrics}:")
+ print(np.cov(z.transpose()))
+
+ score = np.mean(z, axis=1)
+
+ for i, key in enumerate(results.keys()):
+ results[key]['mix'] = score[i].item()
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ uuids = sorted([x for x in os.listdir(args.input) if os.path.isdir(os.path.join(args.input, x)) and is_uuid(x)])
+
+
+ results = OrderedDict()
+
+ for uuid in uuids:
+ results[uuid] = collect_results(os.path.join(args.input, uuid))
+
+
+ add_mix_metric(results)
+
+ ranks = get_ranks(results)
+
+
+
+ csv = args.csv if args.csv.endswith('.csv') else args.csv + '.csv'
+
+ print_csv(args.csv, results, args.tag, ranks=ranks)
+
+
+ with open(csv[:-4] + '.pickle', 'wb') as f:
+ pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL) \ No newline at end of file
diff --git a/dnn/torch/lpcnet/scripts/loop_run.sh b/dnn/torch/lpcnet/scripts/loop_run.sh
new file mode 100644
index 00000000..7250f639
--- /dev/null
+++ b/dnn/torch/lpcnet/scripts/loop_run.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+
+case $# in
+ 9) SETUP=$1; OUTDIR=$2; NAME=$3; DEVICE=$4; ROUNDS=$5; LPCNEXT=$6; LPCNET=$7; TESTSUITE=$8; TESTITEMS=$9;;
+ *) echo "loop_run.sh setup outdir name device rounds lpcnext_repo lpcnet_repo testsuite_repo testitems"; exit;;
+esac
+
+
+PYTHON="/home/ubuntu/opt/miniconda3/envs/torch/bin/python"
+TESTFEATURES=${LPCNEXT}/testitems/features/all_0_orig_features.f32
+WARPQREFERENCE=${LPCNEXT}/testitems/wav/all_0_orig.wav
+METRICS="warpq,pesq,pitch_error,voicing_error"
+LPCNETDEMO=${LPCNET}/lpcnet_demo
+
+for ((round = 1; round <= $ROUNDS; round++))
+do
+ echo
+ echo round $round
+
+ UUID=$(uuidgen)
+ TRAINOUT=${OUTDIR}/${UUID}/training
+ TESTOUT=${OUTDIR}/${UUID}/testing
+ CHECKPOINT=${TRAINOUT}/checkpoints/checkpoint_last.pth
+ FINALCHECKPOINT=${TRAINOUT}/checkpoints/checkpoint_finalize_last.pth
+
+ # run training
+ echo "starting training..."
+ $PYTHON $LPCNEXT/train_lpcnet.py $SETUP $TRAINOUT --device $DEVICE --test-features $TESTFEATURES --warpq-reference $WARPQREFERENCE
+
+ # run finalization
+ echo "starting finalization..."
+ $PYTHON $LPCNEXT/train_lpcnet.py $SETUP $TRAINOUT \
+ --device $DEVICE --test-features $TESTFEATURES \
+ --warpq-reference $WARPQREFERENCE \
+ --finalize --initial-checkpoint $CHECKPOINT
+
+ # create test configs
+ $PYTHON $LPCNEXT/make_test_config.py ${OUTDIR}/${UUID}/testconfig.yml "$NAME $UUID" $CHECKPOINT --lpcnet-demo $LPCNETDEMO
+ $PYTHON $LPCNEXT/make_test_config.py ${OUTDIR}/${UUID}/testconfig_finalize.yml "$NAME $UUID finalized" $FINALCHECKPOINT --lpcnet-demo $LPCNETDEMO
+
+ # run tests
+ echo "starting test 1 (no finalization)..."
+ $PYTHON $TESTSUITE/run_test.py ${OUTDIR}/${UUID}/testconfig.yml \
+ $TESTITEMS ${TESTOUT}/prefinal --num-workers 8 \
+ --num-testitems 400 --metrics $METRICS
+
+ echo "starting test 2 (after finalization)..."
+ $PYTHON $TESTSUITE/run_test.py ${OUTDIR}/${UUID}/testconfig_finalize.yml \
+ $TESTITEMS ${TESTOUT}/final --num-workers 8 \
+ --num-testitems 400 --metrics $METRICS
+done
diff --git a/dnn/torch/lpcnet/scripts/make_animation.py b/dnn/torch/lpcnet/scripts/make_animation.py
new file mode 100644
index 00000000..57656ef1
--- /dev/null
+++ b/dnn/torch/lpcnet/scripts/make_animation.py
@@ -0,0 +1,67 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+""" script for creating animations from debug data
+
+"""
+
+
+import argparse
+
+
+import sys
+sys.path.append('./')
+
+from utils.endoscopy import make_animation, read_data
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('folder', type=str, help='endoscopy folder with debug output')
+parser.add_argument('output', type=str, help='output file (will be auto-extended with .mp4)')
+
+parser.add_argument('--start-index', type=int, help='index of first sample to be considered', default=0)
+parser.add_argument('--stop-index', type=int, help='index of last sample to be considered', default=-1)
+parser.add_argument('--interval', type=int, help='interval between frames in ms', default=20)
+parser.add_argument('--half-window-length', type=int, help='half size of window for displaying signals', default=80)
+
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ filename = args.output if args.output.endswith('.mp4') else args.output + '.mp4'
+ data = read_data(args.folder)
+
+ make_animation(
+ data,
+ filename,
+ start_index=args.start_index,
+ stop_index = args.stop_index,
+ half_signal_window_length=args.half_window_length
+ )
diff --git a/dnn/torch/lpcnet/scripts/modify_dataset_target.py b/dnn/torch/lpcnet/scripts/modify_dataset_target.py
new file mode 100644
index 00000000..a70fe169
--- /dev/null
+++ b/dnn/torch/lpcnet/scripts/modify_dataset_target.py
@@ -0,0 +1,17 @@
+import argparse
+
+import numpy as np
+
+
+parser = argparse.ArgumentParser(description="sets s_t to augmented_s_t")
+
+parser.add_argument('datafile', type=str, help='data.s16 file path')
+
+args = parser.parse_args()
+
+data = np.memmap(args.datafile, dtype='int16', mode='readwrite')
+
+# signal is in data[1::2]
+# last augmented signal is in data[0::2]
+
+data[1 : - 1 : 2] = data[2 : : 2]
diff --git a/dnn/torch/lpcnet/scripts/multi_run.sh b/dnn/torch/lpcnet/scripts/multi_run.sh
new file mode 100644
index 00000000..fb0fee14
--- /dev/null
+++ b/dnn/torch/lpcnet/scripts/multi_run.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+case $# in
+ 9) SETUP=$1; OUTDIR=$2; NAME=$3; NUMDEVICES=$4; ROUNDS=$5; LPCNEXT=$6; LPCNET=$7; TESTSUITE=$8; TESTITEMS=$9;;
+ *) echo "multi_run.sh setup outdir name num_devices rounds_per_device lpcnext_repo lpcnet_repo testsuite_repo testitems"; exit;;
+esac
+
+
+LOOPRUN=${LPCNEXT}/loop_run.sh
+
+mkdir -p $OUTDIR
+
+for ((i = 0; i < $NUMDEVICES; i++))
+do
+ echo "launching job queue for device $i"
+ nohup bash $LOOPRUN $SETUP $OUTDIR "$NAME" "cuda:$i" $ROUNDS $LPCNEXT $LPCNET $TESTSUITE $TESTITEMS > $OUTDIR/job_${i}_out.txt &
+done
diff --git a/dnn/torch/lpcnet/scripts/run_inference_test.sh b/dnn/torch/lpcnet/scripts/run_inference_test.sh
new file mode 100644
index 00000000..9f22b03d
--- /dev/null
+++ b/dnn/torch/lpcnet/scripts/run_inference_test.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+
+case $# in
+ 3) FEATURES=$1; FOLDER=$2; PYTHON=$3;;
+ *) echo "run_inference_test.sh <features file> <output folder> <python path>"; exit;;
+esac
+
+
+SCRIPTFOLDER=$(dirname "$0")
+
+mkdir -p $FOLDER/inference_test
+
+# update checkpoints
+for fn in $(find $FOLDER -type f -name "checkpoint*.pth")
+do
+ tmp=$(basename $fn)
+ tmp=${tmp%.pth}
+ epoch=${tmp#checkpoint_epoch_}
+ echo "running inference with checkpoint $fn..."
+ $PYTHON $SCRIPTFOLDER/../test_lpcnet.py $FEATURES $fn $FOLDER/inference_test/output_epoch_${epoch}.wav
+done
diff --git a/dnn/torch/lpcnet/scripts/update_checkpoints.py b/dnn/torch/lpcnet/scripts/update_checkpoints.py
new file mode 100644
index 00000000..8f00a7e5
--- /dev/null
+++ b/dnn/torch/lpcnet/scripts/update_checkpoints.py
@@ -0,0 +1,54 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+""" script for updating checkpoints with new setup entries
+
+ Use this script to update older outputs with newly introduced
+ parameters. (Saves us the trouble of backward compatibility)
+"""
+
+
+import argparse
+
+import torch
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint_file', type=str, help='checkpoint to be updated')
+parser.add_argument('--model', type=str, help='model update', default=None)
+
+args = parser.parse_args()
+
+checkpoint = torch.load(args.checkpoint_file, map_location='cpu')
+
+# update model entry
+if type(args.model) != type(None):
+ checkpoint['setup']['lpcnet']['model'] = args.model
+
+torch.save(checkpoint, args.checkpoint_file) \ No newline at end of file
diff --git a/dnn/torch/lpcnet/scripts/update_output_folder.sh b/dnn/torch/lpcnet/scripts/update_output_folder.sh
new file mode 100644
index 00000000..487d4a2d
--- /dev/null
+++ b/dnn/torch/lpcnet/scripts/update_output_folder.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+
+case $# in
+ 3) FOLDER=$1; MODEL=$2; PYTHON=$3;;
+ *) echo "update_output_folder.sh folder model python"; exit;;
+esac
+
+
+SCRIPTFOLDER=$(dirname "$0")
+
+
+# update setup
+echo "updating $FOLDER/setup.py..."
+$PYTHON $SCRIPTFOLDER/update_setups.py $FOLDER/setup.yml --model $MODEL
+
+# update checkpoints
+for fn in $(find $FOLDER -type f -name "checkpoint*.pth")
+do
+ echo "updating $fn..."
+ $PYTHON $SCRIPTFOLDER/update_checkpoints.py $fn --model $MODEL
+done \ No newline at end of file
diff --git a/dnn/torch/lpcnet/scripts/update_setups.py b/dnn/torch/lpcnet/scripts/update_setups.py
new file mode 100644
index 00000000..52f81cf4
--- /dev/null
+++ b/dnn/torch/lpcnet/scripts/update_setups.py
@@ -0,0 +1,57 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+""" script for updating setup files with new setup entries
+
+ Use this script to update older outputs with newly introduced
+ parameters. (Saves us the trouble of backward compatibility)
+"""
+
+import argparse
+
+import yaml
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('setup_file', type=str, help='setup to be updated')
+parser.add_argument('--model', type=str, help='model update', default=None)
+
+args = parser.parse_args()
+
+# load setup
+with open(args.setup_file, 'r') as f:
+ setup = yaml.load(f.read(), yaml.FullLoader)
+
+# update model entry
+if type(args.model) != type(None):
+ setup['lpcnet']['model'] = args.model
+
+# dump result
+with open(args.setup_file, 'w') as f:
+ yaml.dump(setup, f)
diff --git a/dnn/torch/lpcnet/test_lpcnet.py b/dnn/torch/lpcnet/test_lpcnet.py
new file mode 100644
index 00000000..49db8b06
--- /dev/null
+++ b/dnn/torch/lpcnet/test_lpcnet.py
@@ -0,0 +1,89 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import argparse
+
+import torch
+import numpy as np
+
+
+from models import model_dict
+from utils.data import load_features
+from utils.wav import wavwrite16
+
+debug = False
+if debug:
+ args = type('dummy', (object,),
+ {
+ 'features' : 'features.f32',
+ 'checkpoint' : 'checkpoint.pth',
+ 'output' : 'out.wav',
+ 'version' : 2
+ })()
+else:
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument('features', type=str, help='feature file')
+ parser.add_argument('checkpoint', type=str, help='checkpoint file')
+ parser.add_argument('output', type=str, help='output file')
+ parser.add_argument('--version', type=int, help='feature version', default=2)
+
+ args = parser.parse_args()
+
+
+torch.set_num_threads(2)
+
+version = args.version
+feature_file = args.features
+checkpoint_file = args.checkpoint
+
+
+
+output_file = args.output
+if not output_file.endswith('.wav'):
+ output_file += '.wav'
+
+checkpoint = torch.load(checkpoint_file, map_location="cpu")
+
+# check model
+if not 'model' in checkpoint['setup']['lpcnet']:
+ print(f'warning: did not find model entry in setup, using default lpcnet')
+ model_name = 'lpcnet'
+else:
+ model_name = checkpoint['setup']['lpcnet']['model']
+
+model = model_dict[model_name](checkpoint['setup']['lpcnet']['config'])
+
+model.load_state_dict(checkpoint['state_dict'])
+
+data = load_features(feature_file)
+
+output = model.generate(data['features'], data['periods'], data['lpcs'])
+
+wavwrite16(output_file, output.numpy(), 16000)
diff --git a/dnn/torch/lpcnet/train_lpcnet.py b/dnn/torch/lpcnet/train_lpcnet.py
new file mode 100644
index 00000000..c6121c87
--- /dev/null
+++ b/dnn/torch/lpcnet/train_lpcnet.py
@@ -0,0 +1,272 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import sys
+
+try:
+ import git
+ has_git = True
+except:
+ has_git = False
+
+import yaml
+
+
+import torch
+from torch.optim.lr_scheduler import LambdaLR
+
+from data import LPCNetDataset
+from models import model_dict
+from engine.lpcnet_engine import train_one_epoch, evaluate
+from utils.data import load_features
+from utils.wav import wavwrite16
+
+
+debug = False
+if debug:
+ args = type('dummy', (object,),
+ {
+ 'setup' : 'setup.yml',
+ 'output' : 'testout',
+ 'device' : None,
+ 'test_features' : None,
+ 'finalize': False,
+ 'initial_checkpoint': None,
+ 'no-redirect': False
+ })()
+else:
+ parser = argparse.ArgumentParser("train_lpcnet.py")
+ parser.add_argument('setup', type=str, help='setup yaml file')
+ parser.add_argument('output', type=str, help='output path')
+ parser.add_argument('--device', type=str, help='compute device', default=None)
+ parser.add_argument('--test-features', type=str, help='test feature file in v2 format', default=None)
+ parser.add_argument('--finalize', action='store_true', help='run single training round with lr=1e-5')
+ parser.add_argument('--initial-checkpoint', type=str, help='initial checkpoint', default=None)
+ parser.add_argument('--no-redirect', action='store_true', help='disables re-direction of output')
+
+ args = parser.parse_args()
+
+
+torch.set_num_threads(4)
+
+with open(args.setup, 'r') as f:
+ setup = yaml.load(f.read(), yaml.FullLoader)
+
+if args.finalize:
+ if args.initial_checkpoint is None:
+ raise ValueError('finalization requires initial checkpoint')
+
+ if 'sparsification' in setup['lpcnet']['config']:
+ for sp_job in setup['lpcnet']['config']['sparsification'].values():
+ sp_job['start'], sp_job['stop'] = 0, 0
+
+ setup['training']['lr'] = 1.0e-5
+ setup['training']['lr_decay_factor'] = 0.0
+ setup['training']['epochs'] = 1
+
+ checkpoint_prefix = 'checkpoint_finalize'
+ output_prefix = 'output_finalize'
+ setup_name = 'setup_finalize.yml'
+ output_file='out_finalize.txt'
+else:
+ checkpoint_prefix = 'checkpoint'
+ output_prefix = 'output'
+ setup_name = 'setup.yml'
+ output_file='out.txt'
+
+
+# check model
+if not 'model' in setup['lpcnet']:
+ print(f'warning: did not find model entry in setup, using default lpcnet')
+ model_name = 'lpcnet'
+else:
+ model_name = setup['lpcnet']['model']
+
+# prepare output folder
+if os.path.exists(args.output) and not debug and not args.finalize:
+ print("warning: output folder exists")
+
+ reply = input('continue? (y/n): ')
+ while reply not in {'y', 'n'}:
+ reply = input('continue? (y/n): ')
+
+ if reply == 'n':
+ os._exit()
+else:
+ os.makedirs(args.output, exist_ok=True)
+
+checkpoint_dir = os.path.join(args.output, 'checkpoints')
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+
+# add repo info to setup
+if has_git:
+ working_dir = os.path.split(__file__)[0]
+ try:
+ repo = git.Repo(working_dir)
+ setup['repo'] = dict()
+ hash = repo.head.object.hexsha
+ urls = list(repo.remote().urls)
+ is_dirty = repo.is_dirty()
+
+ if is_dirty:
+ print("warning: repo is dirty")
+
+ setup['repo']['hash'] = hash
+ setup['repo']['urls'] = urls
+ setup['repo']['dirty'] = is_dirty
+ except:
+ has_git = False
+
+# dump setup
+with open(os.path.join(args.output, setup_name), 'w') as f:
+ yaml.dump(setup, f)
+
+# prepare inference test if wanted
+run_inference_test = False
+if type(args.test_features) != type(None):
+ test_features = load_features(args.test_features)
+ inference_test_dir = os.path.join(args.output, 'inference_test')
+ os.makedirs(inference_test_dir, exist_ok=True)
+ run_inference_test = True
+
+# training parameters
+batch_size = setup['training']['batch_size']
+epochs = setup['training']['epochs']
+lr = setup['training']['lr']
+lr_decay_factor = setup['training']['lr_decay_factor']
+
+# load training dataset
+lpcnet_config = setup['lpcnet']['config']
+data = LPCNetDataset( setup['dataset'],
+ features=lpcnet_config['features'],
+ input_signals=lpcnet_config['signals'],
+ target=lpcnet_config['target'],
+ frames_per_sample=setup['training']['frames_per_sample'],
+ feature_history=lpcnet_config['feature_history'],
+ feature_lookahead=lpcnet_config['feature_lookahead'],
+ lpc_gamma=lpcnet_config.get('lpc_gamma', 1))
+
+# load validation dataset if given
+if 'validation_dataset' in setup:
+ validation_data = LPCNetDataset( setup['validation_dataset'],
+ features=lpcnet_config['features'],
+ input_signals=lpcnet_config['signals'],
+ target=lpcnet_config['target'],
+ frames_per_sample=setup['training']['frames_per_sample'],
+ feature_history=lpcnet_config['feature_history'],
+ feature_lookahead=lpcnet_config['feature_lookahead'],
+ lpc_gamma=lpcnet_config.get('lpc_gamma', 1))
+
+ validation_dataloader = torch.utils.data.DataLoader(validation_data, batch_size=batch_size, drop_last=True, num_workers=4)
+
+ run_validation = True
+else:
+ run_validation = False
+
+# create model
+model = model_dict[model_name](setup['lpcnet']['config'])
+
+if args.initial_checkpoint is not None:
+ print(f"loading state dict from {args.initial_checkpoint}...")
+ chkpt = torch.load(args.initial_checkpoint, map_location='cpu')
+ model.load_state_dict(chkpt['state_dict'])
+
+# set compute device
+if type(args.device) == type(None):
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+else:
+ device = torch.device(args.device)
+
+# push model to device
+model.to(device)
+
+# dataloader
+dataloader = torch.utils.data.DataLoader(data, batch_size=batch_size, drop_last=True, shuffle=True, num_workers=4)
+
+# optimizer is introduced to trainable parameters
+parameters = [p for p in model.parameters() if p.requires_grad]
+optimizer = torch.optim.Adam(parameters, lr=lr)
+
+# learning rate scheduler
+scheduler = LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay_factor * x))
+
+# loss
+criterion = torch.nn.NLLLoss()
+
+# model checkpoint
+checkpoint = {
+ 'setup' : setup,
+ 'state_dict' : model.state_dict(),
+ 'loss' : -1
+}
+
+if not args.no_redirect:
+ print(f"re-directing output to {os.path.join(args.output, output_file)}")
+ sys.stdout = open(os.path.join(args.output, output_file), "w")
+
+best_loss = 1e9
+
+for ep in range(1, epochs + 1):
+ print(f"training epoch {ep}...")
+ new_loss = train_one_epoch(model, criterion, optimizer, dataloader, device, scheduler)
+
+
+ # save checkpoint
+ checkpoint['state_dict'] = model.state_dict()
+ checkpoint['loss'] = new_loss
+
+ if run_validation:
+ print("running validation...")
+ validation_loss = evaluate(model, criterion, validation_dataloader, device)
+ checkpoint['validation_loss'] = validation_loss
+
+ if validation_loss < best_loss:
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_best.pth'))
+ best_loss = validation_loss
+
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_epoch_{ep}.pth'))
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_last.pth'))
+
+ # run inference test
+ if run_inference_test:
+ model.to("cpu")
+ print("running inference test...")
+
+ output = model.generate(test_features['features'], test_features['periods'], test_features['lpcs'])
+
+ testfilename = os.path.join(inference_test_dir, output_prefix + f'_epoch_{ep}.wav')
+
+ wavwrite16(testfilename, output.numpy(), 16000)
+
+ model.to(device)
+
+ print()
diff --git a/dnn/torch/lpcnet/utils/__init__.py b/dnn/torch/lpcnet/utils/__init__.py
new file mode 100644
index 00000000..edbbe02c
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/__init__.py
@@ -0,0 +1,4 @@
+from . import sparsification
+from . import data
+from . import pcm
+from . import sample \ No newline at end of file
diff --git a/dnn/torch/lpcnet/utils/data.py b/dnn/torch/lpcnet/utils/data.py
new file mode 100644
index 00000000..5d6659ef
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/data.py
@@ -0,0 +1,141 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+
+import torch
+import numpy as np
+
+def load_features(feature_file, version=2):
+ if version == 2:
+ layout = {
+ 'cepstrum': [0,18],
+ 'periods': [18, 19],
+ 'pitch_corr': [19, 20],
+ 'lpc': [20, 36]
+ }
+ frame_length = 36
+
+ elif version == 1:
+ layout = {
+ 'cepstrum': [0,18],
+ 'periods': [36, 37],
+ 'pitch_corr': [37, 38],
+ 'lpc': [39, 55],
+ }
+ frame_length = 55
+ else:
+ raise ValueError(f'unknown feature version: {version}')
+
+
+ raw_features = torch.from_numpy(np.fromfile(feature_file, dtype='float32'))
+ raw_features = raw_features.reshape((-1, frame_length))
+
+ features = torch.cat(
+ [
+ raw_features[:, layout['cepstrum'][0] : layout['cepstrum'][1]],
+ raw_features[:, layout['pitch_corr'][0] : layout['pitch_corr'][1]]
+ ],
+ dim=1
+ )
+
+ lpcs = raw_features[:, layout['lpc'][0] : layout['lpc'][1]]
+ periods = (0.1 + 50 * raw_features[:, layout['periods'][0] : layout['periods'][1]] + 100).long()
+
+ return {'features' : features, 'periods' : periods, 'lpcs' : lpcs}
+
+
+
+def create_new_data(signal_path, reference_data_path, new_data_path, offset=320, preemph_factor=0.85):
+ ref_data = np.memmap(reference_data_path, dtype=np.int16)
+ signal = np.memmap(signal_path, dtype=np.int16)
+
+ signal_preemph_path = os.path.splitext(signal_path)[0] + '_preemph.raw'
+ signal_preemph = np.memmap(signal_preemph_path, dtype=np.int16, mode='write', shape=signal.shape)
+
+
+ assert len(signal) % 160 == 0
+ num_frames = len(signal) // 160
+ mem = np.zeros(1)
+ for fr in range(len(signal)//160):
+ signal_preemph[fr * 160 : (fr + 1) * 160] = np.convolve(np.concatenate((mem, signal[fr * 160 : (fr + 1) * 160])), [1, -preemph_factor], mode='valid')
+ mem = signal[(fr + 1) * 160 - 1 : (fr + 1) * 160]
+
+ new_data = np.memmap(new_data_path, dtype=np.int16, mode='write', shape=ref_data.shape)
+
+ new_data[:] = 0
+ N = len(signal) - offset
+ new_data[1 : 2*N + 1: 2] = signal_preemph[offset:]
+ new_data[2 : 2*N + 2: 2] = signal_preemph[offset:]
+
+
+def parse_warpq_scores(output_file):
+ """ extracts warpq scores from output file """
+
+ with open(output_file, "r") as f:
+ lines = f.readlines()
+
+ scores = [float(line.split("WARP-Q score:")[-1]) for line in lines if line.startswith("WARP-Q score:")]
+
+ return scores
+
+
+def parse_stats_file(file):
+
+ with open(file, "r") as f:
+ lines = f.readlines()
+
+ mean = float(lines[0].split(":")[-1])
+ bt_mean = float(lines[1].split(":")[-1])
+ top_mean = float(lines[2].split(":")[-1])
+
+ return mean, bt_mean, top_mean
+
+def collect_test_stats(test_folder):
+ """ collects statistics for all discovered metrics from test folder """
+
+ metrics = {'pesq', 'warpq', 'pitch_error', 'voicing_error'}
+
+ results = dict()
+
+ content = os.listdir(test_folder)
+
+ stats_files = [file for file in content if file.startswith('stats_')]
+
+ for file in stats_files:
+ metric = file[len("stats_") : -len(".txt")]
+
+ if metric not in metrics:
+ print(f"warning: unknown metric {metric}")
+
+ mean, bt_mean, top_mean = parse_stats_file(os.path.join(test_folder, file))
+
+ results[metric] = [mean, bt_mean, top_mean]
+
+ return results
diff --git a/dnn/torch/lpcnet/utils/endoscopy.py b/dnn/torch/lpcnet/utils/endoscopy.py
new file mode 100644
index 00000000..141447e2
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/endoscopy.py
@@ -0,0 +1,234 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+""" module for inspecting models during inference """
+
+import os
+
+import yaml
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+
+import torch
+import numpy as np
+
+# stores entries {key : {'fid' : fid, 'fs' : fs, 'dim' : dim, 'dtype' : dtype}}
+_state = dict()
+_folder = 'endoscopy'
+
+def get_gru_gates(gru, input, state):
+ hidden_size = gru.hidden_size
+
+ direct = torch.matmul(gru.weight_ih_l0, input.squeeze())
+ recurrent = torch.matmul(gru.weight_hh_l0, state.squeeze())
+
+ # reset gate
+ start, stop = 0 * hidden_size, 1 * hidden_size
+ reset_gate = torch.sigmoid(direct[start : stop] + gru.bias_ih_l0[start : stop] + recurrent[start : stop] + gru.bias_hh_l0[start : stop])
+
+ # update gate
+ start, stop = 1 * hidden_size, 2 * hidden_size
+ update_gate = torch.sigmoid(direct[start : stop] + gru.bias_ih_l0[start : stop] + recurrent[start : stop] + gru.bias_hh_l0[start : stop])
+
+ # new gate
+ start, stop = 2 * hidden_size, 3 * hidden_size
+ new_gate = torch.tanh(direct[start : stop] + gru.bias_ih_l0[start : stop] + reset_gate * (recurrent[start : stop] + gru.bias_hh_l0[start : stop]))
+
+ return {'reset_gate' : reset_gate, 'update_gate' : update_gate, 'new_gate' : new_gate}
+
+
+def init(folder='endoscopy'):
+ """ sets up output folder for endoscopy data """
+
+ global _folder
+ _folder = folder
+
+ if not os.path.exists(folder):
+ os.makedirs(folder)
+ else:
+ print(f"warning: endoscopy folder {folder} exists. Content may be lost or inconsistent results may occur.")
+
+def write_data(key, data, fs):
+ """ appends data to previous data written under key """
+
+ global _state
+
+ # convert to numpy if torch.Tensor is given
+ if isinstance(data, torch.Tensor):
+ data = data.detach().numpy()
+
+ if not key in _state:
+ _state[key] = {
+ 'fid' : open(os.path.join(_folder, key + '.bin'), 'wb'),
+ 'fs' : fs,
+ 'dim' : tuple(data.shape),
+ 'dtype' : str(data.dtype)
+ }
+
+ with open(os.path.join(_folder, key + '.yml'), 'w') as f:
+ f.write(yaml.dump({'fs' : fs, 'dim' : tuple(data.shape), 'dtype' : str(data.dtype).split('.')[-1]}))
+ else:
+ if _state[key]['fs'] != fs:
+ raise ValueError(f"fs changed for key {key}: {_state[key]['fs']} vs. {fs}")
+ if _state[key]['dtype'] != str(data.dtype):
+ raise ValueError(f"dtype changed for key {key}: {_state[key]['dtype']} vs. {str(data.dtype)}")
+ if _state[key]['dim'] != tuple(data.shape):
+ raise ValueError(f"dim changed for key {key}: {_state[key]['dim']} vs. {tuple(data.shape)}")
+
+ _state[key]['fid'].write(data.tobytes())
+
+def close(folder='endoscopy'):
+ """ clean up """
+ for key in _state.keys():
+ _state[key]['fid'].close()
+
+
+def read_data(folder='endoscopy'):
+ """ retrieves written data as numpy arrays """
+
+
+ keys = [name[:-4] for name in os.listdir(folder) if name.endswith('.yml')]
+
+ return_dict = dict()
+
+ for key in keys:
+ with open(os.path.join(folder, key + '.yml'), 'r') as f:
+ value = yaml.load(f.read(), yaml.FullLoader)
+
+ with open(os.path.join(folder, key + '.bin'), 'rb') as f:
+ data = np.frombuffer(f.read(), dtype=value['dtype'])
+
+ value['data'] = data.reshape((-1,) + value['dim'])
+
+ return_dict[key] = value
+
+ return return_dict
+
+def get_best_reshape(shape, target_ratio=1):
+ """ calculated the best 2d reshape of shape given the target ratio (rows/cols)"""
+
+ if len(shape) > 1:
+ pixel_count = 1
+ for s in shape:
+ pixel_count *= s
+ else:
+ pixel_count = shape[0]
+
+ if pixel_count == 1:
+ return (1,)
+
+ num_columns = int((pixel_count / target_ratio)**.5)
+
+ while (pixel_count % num_columns):
+ num_columns -= 1
+
+ num_rows = pixel_count // num_columns
+
+ return (num_rows, num_columns)
+
+def get_type_and_shape(shape):
+
+ # can happen if data is one dimensional
+ if len(shape) == 0:
+ shape = (1,)
+
+ # calculate pixel count
+ if len(shape) > 1:
+ pixel_count = 1
+ for s in shape:
+ pixel_count *= s
+ else:
+ pixel_count = shape[0]
+
+ if pixel_count == 1:
+ return 'plot', (1, )
+
+ # stay with shape if already 2-dimensional
+ if len(shape) == 2:
+ if (shape[0] != pixel_count) or (shape[1] != pixel_count):
+ return 'image', shape
+
+ return 'image', get_best_reshape(shape)
+
+def make_animation(data, filename, start_index=80, stop_index=-80, interval=20, half_signal_window_length=80):
+
+ # determine plot setup
+ num_keys = len(data.keys())
+
+ num_rows = int((num_keys * 3/4) ** .5)
+
+ num_cols = (num_keys + num_rows - 1) // num_rows
+
+ fig, axs = plt.subplots(num_rows, num_cols)
+ fig.set_size_inches(num_cols * 5, num_rows * 5)
+
+ display = dict()
+
+ fs_max = max([val['fs'] for val in data.values()])
+
+ num_samples = max([val['data'].shape[0] for val in data.values()])
+
+ keys = sorted(data.keys())
+
+ # inspect data
+ for i, key in enumerate(keys):
+ axs[i // num_cols, i % num_cols].title.set_text(key)
+
+ display[key] = dict()
+
+ display[key]['type'], display[key]['shape'] = get_type_and_shape(data[key]['dim'])
+ display[key]['down_factor'] = data[key]['fs'] / fs_max
+
+ start_index = max(start_index, half_signal_window_length)
+ while stop_index < 0:
+ stop_index += num_samples
+
+ stop_index = min(stop_index, num_samples - half_signal_window_length)
+
+ # actual plotting
+ frames = []
+ for index in range(start_index, stop_index):
+ ims = []
+ for i, key in enumerate(keys):
+ feature_index = int(round(index * display[key]['down_factor']))
+
+ if display[key]['type'] == 'plot':
+ ims.append(axs[i // num_cols, i % num_cols].plot(data[key]['data'][index - half_signal_window_length : index + half_signal_window_length], marker='P', markevery=[half_signal_window_length], animated=True, color='blue')[0])
+
+ elif display[key]['type'] == 'image':
+ ims.append(axs[i // num_cols, i % num_cols].imshow(data[key]['data'][index].reshape(display[key]['shape']), animated=True))
+
+ frames.append(ims)
+
+ ani = animation.ArtistAnimation(fig, frames, interval=interval, blit=True, repeat_delay=1000)
+
+ if not filename.endswith('.mp4'):
+ filename += '.mp4'
+
+ ani.save(filename) \ No newline at end of file
diff --git a/dnn/torch/lpcnet/utils/layers/__init__.py b/dnn/torch/lpcnet/utils/layers/__init__.py
new file mode 100644
index 00000000..4a58f221
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/layers/__init__.py
@@ -0,0 +1,3 @@
+from .dual_fc import DualFC
+from .subconditioner import AdditiveSubconditioner, ModulativeSubconditioner, ConcatenativeSubconditioner
+from .pcm_embeddings import PCMEmbedding, DifferentiablePCMEmbedding \ No newline at end of file
diff --git a/dnn/torch/lpcnet/utils/layers/dual_fc.py b/dnn/torch/lpcnet/utils/layers/dual_fc.py
new file mode 100644
index 00000000..25d9a5f9
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/layers/dual_fc.py
@@ -0,0 +1,44 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+
+class DualFC(nn.Module):
+ def __init__(self, input_dim, output_dim):
+ super(DualFC, self).__init__()
+
+ self.dense1 = nn.Linear(input_dim, output_dim)
+ self.dense2 = nn.Linear(input_dim, output_dim)
+
+ self.alpha = nn.Parameter(torch.tensor([0.5]), requires_grad=True)
+ self.beta = nn.Parameter(torch.tensor([0.5]), requires_grad=True)
+
+ def forward(self, x):
+ return self.alpha * torch.tanh(self.dense1(x)) + self.beta * torch.tanh(self.dense2(x))
diff --git a/dnn/torch/lpcnet/utils/layers/pcm_embeddings.py b/dnn/torch/lpcnet/utils/layers/pcm_embeddings.py
new file mode 100644
index 00000000..603a17ab
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/layers/pcm_embeddings.py
@@ -0,0 +1,71 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+""" module implementing PCM embeddings for LPCNet """
+
+import math as m
+
+import torch
+from torch import nn
+
+
+class PCMEmbedding(nn.Module):
+ def __init__(self, embed_dim=128, num_levels=256):
+ super(PCMEmbedding, self).__init__()
+
+ self.embed_dim = embed_dim
+ self.num_levels = num_levels
+
+ self.embedding = nn.Embedding(self.num_levels, self.num_dim)
+
+ # initialize
+ with torch.no_grad():
+ num_rows, num_cols = self.num_levels, self.embed_dim
+ a = m.sqrt(12) * (torch.rand(num_rows, num_cols) - 0.5)
+ for i in range(num_rows):
+ a[i, :] += m.sqrt(12) * (i - num_rows / 2)
+ self.embedding.weight[:, :] = 0.1 * a
+
+ def forward(self, x):
+ return self.embeddint(x)
+
+
+class DifferentiablePCMEmbedding(PCMEmbedding):
+ def __init__(self, embed_dim, num_levels=256):
+ super(DifferentiablePCMEmbedding, self).__init__(embed_dim, num_levels)
+
+ def forward(self, x):
+ x_int = (x - torch.floor(x)).detach().long()
+ x_frac = x - x_int
+ x_next = torch.minimum(x_int + 1, self.num_levels)
+
+ embed_0 = self.embedding(x_int)
+ embed_1 = self.embedding(x_next)
+
+ return (1 - x_frac) * embed_0 + x_frac * embed_1
diff --git a/dnn/torch/lpcnet/utils/layers/subconditioner.py b/dnn/torch/lpcnet/utils/layers/subconditioner.py
new file mode 100644
index 00000000..691eb449
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/layers/subconditioner.py
@@ -0,0 +1,497 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+from re import sub
+import torch
+from torch import nn
+
+
+
+
+def get_subconditioner( method,
+ number_of_subsamples,
+ pcm_embedding_size,
+ state_size,
+ pcm_levels,
+ number_of_signals,
+ **kwargs):
+
+ subconditioner_dict = {
+ 'additive' : AdditiveSubconditioner,
+ 'concatenative' : ConcatenativeSubconditioner,
+ 'modulative' : ModulativeSubconditioner
+ }
+
+ return subconditioner_dict[method](number_of_subsamples,
+ pcm_embedding_size, state_size, pcm_levels, number_of_signals, **kwargs)
+
+
+class Subconditioner(nn.Module):
+ def __init__(self):
+ """ upsampling by subconditioning
+
+ Upsamples a sequence of states conditioning on pcm signals and
+ optionally a feature vector.
+ """
+ super(Subconditioner, self).__init__()
+
+ def forward(self, states, signals, features=None):
+ raise Exception("Base class should not be called")
+
+ def single_step(self, index, state, signals, features):
+ raise Exception("Base class should not be called")
+
+ def get_output_dim(self, index):
+ raise Exception("Base class should not be called")
+
+
+class AdditiveSubconditioner(Subconditioner):
+ def __init__(self,
+ number_of_subsamples,
+ pcm_embedding_size,
+ state_size,
+ pcm_levels,
+ number_of_signals,
+ **kwargs):
+ """ subconditioning by addition """
+
+ super(AdditiveSubconditioner, self).__init__()
+
+ self.number_of_subsamples = number_of_subsamples
+ self.pcm_embedding_size = pcm_embedding_size
+ self.state_size = state_size
+ self.pcm_levels = pcm_levels
+ self.number_of_signals = number_of_signals
+
+ if self.pcm_embedding_size != self.state_size:
+ raise ValueError('For additive subconditioning state and embedding '
+ + f'sizes must match but but got {self.state_size} and {self.pcm_embedding_size}')
+
+ self.embeddings = [None]
+ for i in range(1, self.number_of_subsamples):
+ embedding = nn.Embedding(self.pcm_levels, self.pcm_embedding_size)
+ self.add_module('pcm_embedding_' + str(i), embedding)
+ self.embeddings.append(embedding)
+
+ def forward(self, states, signals):
+ """ creates list of subconditioned states
+
+ Parameters:
+ -----------
+ states : torch.tensor
+ states of shape (batch, seq_length // s, state_size)
+ signals : torch.tensor
+ signals of shape (batch, seq_length, number_of_signals)
+
+ Returns:
+ --------
+ c_states : list of torch.tensor
+ list of s subconditioned states
+ """
+
+ s = self.number_of_subsamples
+
+ c_states = [states]
+ new_states = states
+ for i in range(1, self.number_of_subsamples):
+ embed = self.embeddings[i](signals[:, i::s])
+ # reduce signal dimension
+ embed = torch.sum(embed, dim=2)
+
+ new_states = new_states + embed
+ c_states.append(new_states)
+
+ return c_states
+
+ def single_step(self, index, state, signals):
+ """ carry out single step for inference
+
+ Parameters:
+ -----------
+ index : int
+ position in subconditioning batch
+
+ state : torch.tensor
+ state to sub-condition
+
+ signals : torch.tensor
+ signals for subconditioning, all but the last dimensions
+ must match those of state
+
+ Returns:
+ c_state : torch.tensor
+ subconditioned state
+ """
+
+ if index == 0:
+ c_state = state
+ else:
+ embed_signals = self.embeddings[index](signals)
+ c = torch.sum(embed_signals, dim=-2)
+ c_state = state + c
+
+ return c_state
+
+ def get_output_dim(self, index):
+ return self.state_size
+
+ def get_average_flops_per_step(self):
+ s = self.number_of_subsamples
+ flops = (s - 1) / s * self.number_of_signals * self.pcm_embedding_size
+ return flops
+
+
+class ConcatenativeSubconditioner(Subconditioner):
+ def __init__(self,
+ number_of_subsamples,
+ pcm_embedding_size,
+ state_size,
+ pcm_levels,
+ number_of_signals,
+ recurrent=True,
+ **kwargs):
+ """ subconditioning by concatenation """
+
+ super(ConcatenativeSubconditioner, self).__init__()
+
+ self.number_of_subsamples = number_of_subsamples
+ self.pcm_embedding_size = pcm_embedding_size
+ self.state_size = state_size
+ self.pcm_levels = pcm_levels
+ self.number_of_signals = number_of_signals
+ self.recurrent = recurrent
+
+ self.embeddings = []
+ start_index = 0
+ if self.recurrent:
+ start_index = 1
+ self.embeddings.append(None)
+
+ for i in range(start_index, self.number_of_subsamples):
+ embedding = nn.Embedding(self.pcm_levels, self.pcm_embedding_size)
+ self.add_module('pcm_embedding_' + str(i), embedding)
+ self.embeddings.append(embedding)
+
+ def forward(self, states, signals):
+ """ creates list of subconditioned states
+
+ Parameters:
+ -----------
+ states : torch.tensor
+ states of shape (batch, seq_length // s, state_size)
+ signals : torch.tensor
+ signals of shape (batch, seq_length, number_of_signals)
+
+ Returns:
+ --------
+ c_states : list of torch.tensor
+ list of s subconditioned states
+ """
+ s = self.number_of_subsamples
+
+ if self.recurrent:
+ c_states = [states]
+ start = 1
+ else:
+ c_states = []
+ start = 0
+
+ new_states = states
+ for i in range(start, self.number_of_subsamples):
+ embed = self.embeddings[i](signals[:, i::s])
+ # reduce signal dimension
+ embed = torch.flatten(embed, -2)
+
+ if self.recurrent:
+ new_states = torch.cat((new_states, embed), dim=-1)
+ else:
+ new_states = torch.cat((states, embed), dim=-1)
+
+ c_states.append(new_states)
+
+ return c_states
+
+ def single_step(self, index, state, signals):
+ """ carry out single step for inference
+
+ Parameters:
+ -----------
+ index : int
+ position in subconditioning batch
+
+ state : torch.tensor
+ state to sub-condition
+
+ signals : torch.tensor
+ signals for subconditioning, all but the last dimensions
+ must match those of state
+
+ Returns:
+ c_state : torch.tensor
+ subconditioned state
+ """
+
+ if index == 0 and self.recurrent:
+ c_state = state
+ else:
+ embed_signals = self.embeddings[index](signals)
+ c = torch.flatten(embed_signals, -2)
+ if not self.recurrent and index > 0:
+ # overwrite previous conditioning vector
+ c_state = torch.cat((state[...,:self.state_size], c), dim=-1)
+ else:
+ c_state = torch.cat((state, c), dim=-1)
+ return c_state
+
+ return c_state
+
+ def get_average_flops_per_step(self):
+ return 0
+
+ def get_output_dim(self, index):
+ if self.recurrent:
+ return self.state_size + index * self.pcm_embedding_size * self.number_of_signals
+ else:
+ return self.state_size + self.pcm_embedding_size * self.number_of_signals
+
+class ModulativeSubconditioner(Subconditioner):
+ def __init__(self,
+ number_of_subsamples,
+ pcm_embedding_size,
+ state_size,
+ pcm_levels,
+ number_of_signals,
+ state_recurrent=False,
+ **kwargs):
+ """ subconditioning by modulation """
+
+ super(ModulativeSubconditioner, self).__init__()
+
+ self.number_of_subsamples = number_of_subsamples
+ self.pcm_embedding_size = pcm_embedding_size
+ self.state_size = state_size
+ self.pcm_levels = pcm_levels
+ self.number_of_signals = number_of_signals
+ self.state_recurrent = state_recurrent
+
+ self.hidden_size = self.pcm_embedding_size * self.number_of_signals
+
+ if self.state_recurrent:
+ self.hidden_size += self.pcm_embedding_size
+ self.state_transform = nn.Linear(self.state_size, self.pcm_embedding_size)
+
+ self.embeddings = [None]
+ self.alphas = [None]
+ self.betas = [None]
+
+ for i in range(1, self.number_of_subsamples):
+ embedding = nn.Embedding(self.pcm_levels, self.pcm_embedding_size)
+ self.add_module('pcm_embedding_' + str(i), embedding)
+ self.embeddings.append(embedding)
+
+ self.alphas.append(nn.Linear(self.hidden_size, self.state_size))
+ self.add_module('alpha_dense_' + str(i), self.alphas[-1])
+
+ self.betas.append(nn.Linear(self.hidden_size, self.state_size))
+ self.add_module('beta_dense_' + str(i), self.betas[-1])
+
+
+
+ def forward(self, states, signals):
+ """ creates list of subconditioned states
+
+ Parameters:
+ -----------
+ states : torch.tensor
+ states of shape (batch, seq_length // s, state_size)
+ signals : torch.tensor
+ signals of shape (batch, seq_length, number_of_signals)
+
+ Returns:
+ --------
+ c_states : list of torch.tensor
+ list of s subconditioned states
+ """
+ s = self.number_of_subsamples
+
+ c_states = [states]
+ new_states = states
+ for i in range(1, self.number_of_subsamples):
+ embed = self.embeddings[i](signals[:, i::s])
+ # reduce signal dimension
+ embed = torch.flatten(embed, -2)
+
+ if self.state_recurrent:
+ comp_states = self.state_transform(new_states)
+ embed = torch.cat((embed, comp_states), dim=-1)
+
+ alpha = torch.tanh(self.alphas[i](embed))
+ beta = torch.tanh(self.betas[i](embed))
+
+ # new state obtained by modulating previous state
+ new_states = torch.tanh((1 + alpha) * new_states + beta)
+
+ c_states.append(new_states)
+
+ return c_states
+
+ def single_step(self, index, state, signals):
+ """ carry out single step for inference
+
+ Parameters:
+ -----------
+ index : int
+ position in subconditioning batch
+
+ state : torch.tensor
+ state to sub-condition
+
+ signals : torch.tensor
+ signals for subconditioning, all but the last dimensions
+ must match those of state
+
+ Returns:
+ c_state : torch.tensor
+ subconditioned state
+ """
+
+ if index == 0:
+ c_state = state
+ else:
+ embed_signals = self.embeddings[index](signals)
+ c = torch.flatten(embed_signals, -2)
+ if self.state_recurrent:
+ r_state = self.state_transform(state)
+ c = torch.cat((c, r_state), dim=-1)
+ alpha = torch.tanh(self.alphas[index](c))
+ beta = torch.tanh(self.betas[index](c))
+ c_state = torch.tanh((1 + alpha) * state + beta)
+ return c_state
+
+ return c_state
+
+ def get_output_dim(self, index):
+ return self.state_size
+
+ def get_average_flops_per_step(self):
+ s = self.number_of_subsamples
+
+ # estimate activation by 10 flops
+ # c_state = torch.tanh((1 + alpha) * state + beta)
+ flops = 13 * self.state_size
+
+ # hidden size
+ hidden_size = self.number_of_signals * self.pcm_embedding_size
+ if self.state_recurrent:
+ hidden_size += self.pcm_embedding_size
+
+ # counting 2 * A * B flops for Linear(A, B)
+ # alpha = torch.tanh(self.alphas[index](c))
+ # beta = torch.tanh(self.betas[index](c))
+ flops += 4 * hidden_size * self.state_size + 20 * self.state_size
+
+ # r_state = self.state_transform(state)
+ if self.state_recurrent:
+ flops += 2 * self.state_size * self.pcm_embedding_size
+
+ # average over steps
+ flops *= (s - 1) / s
+
+ return flops
+
+class ComparitiveSubconditioner(Subconditioner):
+ def __init__(self,
+ number_of_subsamples,
+ pcm_embedding_size,
+ state_size,
+ pcm_levels,
+ number_of_signals,
+ error_index=-1,
+ apply_gate=True,
+ normalize=False):
+ """ subconditioning by comparison """
+
+ super(ComparitiveSubconditioner, self).__init__()
+
+ self.comparison_size = self.pcm_embedding_size
+ self.error_position = error_index
+ self.apply_gate = apply_gate
+ self.normalize = normalize
+
+ self.state_transform = nn.Linear(self.state_size, self.comparison_size)
+
+ self.alpha_dense = nn.Linear(self.number_of_signales * self.pcm_embedding_size, self.state_size)
+ self.beta_dense = nn.Linear(self.number_of_signales * self.pcm_embedding_size, self.state_size)
+
+ if self.apply_gate:
+ self.gate_dense = nn.Linear(self.pcm_embedding_size, self.state_size)
+
+ # embeddings and state transforms
+ self.embeddings = [None]
+ self.alpha_denses = [None]
+ self.beta_denses = [None]
+ self.state_transforms = [nn.Linear(self.state_size, self.comparison_size)]
+ self.add_module('state_transform_0', self.state_transforms[0])
+
+ for i in range(1, self.number_of_subsamples):
+ embedding = nn.Embedding(self.pcm_levels, self.pcm_embedding_size)
+ self.add_module('pcm_embedding_' + str(i), embedding)
+ self.embeddings.append(embedding)
+
+ state_transform = nn.Linear(self.state_size, self.comparison_size)
+ self.add_module('state_transform_' + str(i), state_transform)
+ self.state_transforms.append(state_transform)
+
+ self.alpha_denses.append(nn.Linear(self.number_of_signales * self.pcm_embedding_size, self.state_size))
+ self.add_module('alpha_dense_' + str(i), self.alpha_denses[-1])
+
+ self.beta_denses.append(nn.Linear(self.number_of_signales * self.pcm_embedding_size, self.state_size))
+ self.add_module('beta_dense_' + str(i), self.beta_denses[-1])
+
+ def forward(self, states, signals):
+ s = self.number_of_subsamples
+
+ c_states = [states]
+ new_states = states
+ for i in range(1, self.number_of_subsamples):
+ embed = self.embeddings[i](signals[:, i::s])
+ # reduce signal dimension
+ embed = torch.flatten(embed, -2)
+
+ comp_states = self.state_transforms[i](new_states)
+
+ alpha = torch.tanh(self.alpha_dense(embed))
+ beta = torch.tanh(self.beta_dense(embed))
+
+ # new state obtained by modulating previous state
+ new_states = torch.tanh((1 + alpha) * comp_states + beta)
+
+ c_states.append(new_states)
+
+ return c_states
diff --git a/dnn/torch/lpcnet/utils/misc.py b/dnn/torch/lpcnet/utils/misc.py
new file mode 100644
index 00000000..b295d6c4
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/misc.py
@@ -0,0 +1,65 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+
+
+def find(a, v):
+ try:
+ idx = a.index(v)
+ except:
+ idx = -1
+ return idx
+
+def interleave_tensors(tensors, dim=-2):
+ """ interleave list of tensors along sequence dimension """
+
+ x = torch.cat([x.unsqueeze(dim) for x in tensors], dim=dim)
+ x = torch.flatten(x, dim - 1, dim)
+
+ return x
+
+def _interleave(x, pcm_levels=256):
+
+ repeats = pcm_levels // (2*x.size(-1))
+ x = x.unsqueeze(-1)
+ p = torch.flatten(torch.repeat_interleave(torch.cat((x, 1 - x), dim=-1), repeats, dim=-1), -2)
+
+ return p
+
+def get_pdf_from_tree(x):
+ pcm_levels = x.size(-1)
+
+ p = _interleave(x[..., 1:2])
+ n = 4
+ while n <= pcm_levels:
+ p = p * _interleave(x[..., n//2:n])
+ n *= 2
+
+ return p \ No newline at end of file
diff --git a/dnn/torch/lpcnet/utils/pcm.py b/dnn/torch/lpcnet/utils/pcm.py
new file mode 100644
index 00000000..7e985b84
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/pcm.py
@@ -0,0 +1,35 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+def clip_to_int16(x):
+ int_min = -2**15
+ int_max = 2**15 - 1
+ x_clipped = max(int_min, min(x, int_max))
+ return x_clipped
diff --git a/dnn/torch/lpcnet/utils/sample.py b/dnn/torch/lpcnet/utils/sample.py
new file mode 100644
index 00000000..63f9e529
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/sample.py
@@ -0,0 +1,44 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+
+
+def sample_excitation(probs, pitch_corr):
+
+ norm = lambda x : x / (x.sum() + 1e-18)
+
+ # lowering the temperature
+ probs = norm(probs ** (1 + max(0, 1.5 * pitch_corr - 0.5)))
+ # cut-off tails
+ probs = norm(torch.maximum(probs - 0.002 , torch.FloatTensor([0])))
+ # sample
+ exc = torch.multinomial(probs.squeeze(), 1)
+
+ return exc
diff --git a/dnn/torch/lpcnet/utils/sparsification/__init__.py b/dnn/torch/lpcnet/utils/sparsification/__init__.py
new file mode 100644
index 00000000..ebfa9d9a
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/sparsification/__init__.py
@@ -0,0 +1,2 @@
+from .gru_sparsifier import GRUSparsifier
+from .common import sparsify_matrix, calculate_gru_flops_per_step \ No newline at end of file
diff --git a/dnn/torch/lpcnet/utils/sparsification/common.py b/dnn/torch/lpcnet/utils/sparsification/common.py
new file mode 100644
index 00000000..2600cd01
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/sparsification/common.py
@@ -0,0 +1,121 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+
+def sparsify_matrix(matrix : torch.tensor, density : float, block_size, keep_diagonal : bool=False, return_mask : bool=False):
+ """ sparsifies matrix with specified block size
+
+ Parameters:
+ -----------
+ matrix : torch.tensor
+ matrix to sparsify
+ density : int
+ target density
+ block_size : [int, int]
+ block size dimensions
+ keep_diagonal : bool
+ If true, the diagonal will be kept. This option requires block_size[0] == block_size[1] and defaults to False
+ """
+
+ m, n = matrix.shape
+ m1, n1 = block_size
+
+ if m % m1 or n % n1:
+ raise ValueError(f"block size {(m1, n1)} does not divide matrix size {(m, n)}")
+
+ # extract diagonal if keep_diagonal = True
+ if keep_diagonal:
+ if m != n:
+ raise ValueError("Attempting to sparsify non-square matrix with keep_diagonal=True")
+
+ to_spare = torch.diag(torch.diag(matrix))
+ matrix = matrix - to_spare
+ else:
+ to_spare = torch.zeros_like(matrix)
+
+ # calculate energy in sub-blocks
+ x = torch.reshape(matrix, (m // m1, m1, n // n1, n1))
+ x = x ** 2
+ block_energies = torch.sum(torch.sum(x, dim=3), dim=1)
+
+ number_of_blocks = (m * n) // (m1 * n1)
+ number_of_survivors = round(number_of_blocks * density)
+
+ # masking threshold
+ if number_of_survivors == 0:
+ threshold = 0
+ else:
+ threshold = torch.sort(torch.flatten(block_energies)).values[-number_of_survivors]
+
+ # create mask
+ mask = torch.ones_like(block_energies)
+ mask[block_energies < threshold] = 0
+ mask = torch.repeat_interleave(mask, m1, dim=0)
+ mask = torch.repeat_interleave(mask, n1, dim=1)
+
+ # perform masking
+ masked_matrix = mask * matrix + to_spare
+
+ if return_mask:
+ return masked_matrix, mask
+ else:
+ return masked_matrix
+
+def calculate_gru_flops_per_step(gru, sparsification_dict=dict(), drop_input=False):
+ input_size = gru.input_size
+ hidden_size = gru.hidden_size
+ flops = 0
+
+ input_density = (
+ sparsification_dict.get('W_ir', [1])[0]
+ + sparsification_dict.get('W_in', [1])[0]
+ + sparsification_dict.get('W_iz', [1])[0]
+ ) / 3
+
+ recurrent_density = (
+ sparsification_dict.get('W_hr', [1])[0]
+ + sparsification_dict.get('W_hn', [1])[0]
+ + sparsification_dict.get('W_hz', [1])[0]
+ ) / 3
+
+ # input matrix vector multiplications
+ if not drop_input:
+ flops += 2 * 3 * input_size * hidden_size * input_density
+
+ # recurrent matrix vector multiplications
+ flops += 2 * 3 * hidden_size * hidden_size * recurrent_density
+
+ # biases
+ flops += 6 * hidden_size
+
+ # activations estimated by 10 flops per activation
+ flops += 30 * hidden_size
+
+ return flops
diff --git a/dnn/torch/lpcnet/utils/sparsification/gru_sparsifier.py b/dnn/torch/lpcnet/utils/sparsification/gru_sparsifier.py
new file mode 100644
index 00000000..4dfdaf0a
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/sparsification/gru_sparsifier.py
@@ -0,0 +1,187 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+
+from .common import sparsify_matrix
+
+
+class GRUSparsifier:
+ def __init__(self, task_list, start, stop, interval, exponent=3):
+ """ Sparsifier for torch.nn.GRUs
+
+ Parameters:
+ -----------
+ task_list : list
+ task_list contains a list of tuples (gru, sparsify_dict), where gru is an instance
+ of torch.nn.GRU and sparsify_dic is a dictionary with keys in {'W_ir', 'W_iz', 'W_in',
+ 'W_hr', 'W_hz', 'W_hn'} corresponding to the input and recurrent weights for the reset,
+ update, and new gate. The values of sparsify_dict are tuples (density, [m, n], keep_diagonal),
+ where density is the target density in [0, 1], [m, n] is the shape sub-blocks to which
+ sparsification is applied and keep_diagonal is a bool variable indicating whether the diagonal
+ should be kept.
+
+ start : int
+ training step after which sparsification will be started.
+
+ stop : int
+ training step after which sparsification will be completed.
+
+ interval : int
+ sparsification interval for steps between start and stop. After stop sparsification will be
+ carried out after every call to GRUSparsifier.step()
+
+ exponent : float
+ Interpolation exponent for sparsification interval. In step i sparsification will be carried out
+ with density (alpha + target_density * (1 * alpha)), where
+ alpha = ((stop - i) / (start - stop)) ** exponent
+
+ Example:
+ --------
+ >>> import torch
+ >>> gru = torch.nn.GRU(10, 20)
+ >>> sparsify_dict = {
+ ... 'W_ir' : (0.5, [2, 2], False),
+ ... 'W_iz' : (0.6, [2, 2], False),
+ ... 'W_in' : (0.7, [2, 2], False),
+ ... 'W_hr' : (0.1, [4, 4], True),
+ ... 'W_hz' : (0.2, [4, 4], True),
+ ... 'W_hn' : (0.3, [4, 4], True),
+ ... }
+ >>> sparsifier = GRUSparsifier([(gru, sparsify_dict)], 0, 100, 50)
+ >>> for i in range(100):
+ ... sparsifier.step()
+ """
+ # just copying parameters...
+ self.start = start
+ self.stop = stop
+ self.interval = interval
+ self.exponent = exponent
+ self.task_list = task_list
+
+ # ... and setting counter to 0
+ self.step_counter = 0
+
+ self.last_masks = {key : None for key in ['W_ir', 'W_in', 'W_iz', 'W_hr', 'W_hn', 'W_hz']}
+
+ def step(self, verbose=False):
+ """ carries out sparsification step
+
+ Call this function after optimizer.step in your
+ training loop.
+
+ Parameters:
+ ----------
+ verbose : bool
+ if true, densities are printed out
+
+ Returns:
+ --------
+ None
+
+ """
+ # compute current interpolation factor
+ self.step_counter += 1
+
+ if self.step_counter < self.start:
+ return
+ elif self.step_counter < self.stop:
+ # update only every self.interval-th interval
+ if self.step_counter % self.interval:
+ return
+
+ alpha = ((self.stop - self.step_counter) / (self.stop - self.start)) ** self.exponent
+ else:
+ alpha = 0
+
+
+ with torch.no_grad():
+ for gru, params in self.task_list:
+ hidden_size = gru.hidden_size
+
+ # input weights
+ for i, key in enumerate(['W_ir', 'W_iz', 'W_in']):
+ if key in params:
+ density = alpha + (1 - alpha) * params[key][0]
+ if verbose:
+ print(f"[{self.step_counter}]: {key} density: {density}")
+
+ gru.weight_ih_l0[i * hidden_size : (i+1) * hidden_size, : ], new_mask = sparsify_matrix(
+ gru.weight_ih_l0[i * hidden_size : (i + 1) * hidden_size, : ],
+ density, # density
+ params[key][1], # block_size
+ params[key][2], # keep_diagonal (might want to set this to False)
+ return_mask=True
+ )
+
+ if type(self.last_masks[key]) != type(None):
+ if not torch.all(self.last_masks[key] == new_mask) and self.step_counter > self.stop:
+ print(f"sparsification mask {key} changed for gru {gru}")
+
+ self.last_masks[key] = new_mask
+
+ # recurrent weights
+ for i, key in enumerate(['W_hr', 'W_hz', 'W_hn']):
+ if key in params:
+ density = alpha + (1 - alpha) * params[key][0]
+ if verbose:
+ print(f"[{self.step_counter}]: {key} density: {density}")
+ gru.weight_hh_l0[i * hidden_size : (i+1) * hidden_size, : ], new_mask = sparsify_matrix(
+ gru.weight_hh_l0[i * hidden_size : (i + 1) * hidden_size, : ],
+ density,
+ params[key][1], # block_size
+ params[key][2], # keep_diagonal (might want to set this to False)
+ return_mask=True
+ )
+
+ if type(self.last_masks[key]) != type(None):
+ if not torch.all(self.last_masks[key] == new_mask) and self.step_counter > self.stop:
+ print(f"sparsification mask {key} changed for gru {gru}")
+
+ self.last_masks[key] = new_mask
+
+
+
+if __name__ == "__main__":
+ print("Testing sparsifier")
+
+ gru = torch.nn.GRU(10, 20)
+ sparsify_dict = {
+ 'W_ir' : (0.5, [2, 2], False),
+ 'W_iz' : (0.6, [2, 2], False),
+ 'W_in' : (0.7, [2, 2], False),
+ 'W_hr' : (0.1, [4, 4], True),
+ 'W_hz' : (0.2, [4, 4], True),
+ 'W_hn' : (0.3, [4, 4], True),
+ }
+
+ sparsifier = GRUSparsifier([(gru, sparsify_dict)], 0, 100, 10)
+
+ for i in range(100):
+ sparsifier.step(verbose=True)
diff --git a/dnn/torch/lpcnet/utils/templates.py b/dnn/torch/lpcnet/utils/templates.py
new file mode 100644
index 00000000..89068562
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/templates.py
@@ -0,0 +1,157 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+from models import multi_rate_lpcnet
+import copy
+
+setup_dict = dict()
+
+dataset_template_v2 = {
+ 'version' : 2,
+ 'feature_file' : 'features.f32',
+ 'signal_file' : 'data.s16',
+ 'frame_length' : 160,
+ 'feature_frame_length' : 36,
+ 'signal_frame_length' : 2,
+ 'feature_dtype' : 'float32',
+ 'signal_dtype' : 'int16',
+ 'feature_frame_layout' : {'cepstrum': [0,18], 'periods': [18, 19], 'pitch_corr': [19, 20], 'lpc': [20, 36]},
+ 'signal_frame_layout' : {'last_signal' : 0, 'signal': 1} # signal, last_signal, error, prediction
+}
+
+dataset_template_v1 = {
+ 'version' : 1,
+ 'feature_file' : 'features.f32',
+ 'signal_file' : 'data.u8',
+ 'frame_length' : 160,
+ 'feature_frame_length' : 55,
+ 'signal_frame_length' : 4,
+ 'feature_dtype' : 'float32',
+ 'signal_dtype' : 'uint8',
+ 'feature_frame_layout' : {'cepstrum': [0,18], 'periods': [36, 37], 'pitch_corr': [37, 38], 'lpc': [39, 55]},
+ 'signal_frame_layout' : {'last_signal' : 0, 'prediction' : 1, 'last_error': 2, 'error': 3} # signal, last_signal, error, prediction
+}
+
+# lpcnet
+
+lpcnet_config = {
+ 'frame_size' : 160,
+ 'gru_a_units' : 384,
+ 'gru_b_units' : 64,
+ 'feature_conditioning_dim' : 128,
+ 'feature_conv_kernel_size' : 3,
+ 'period_levels' : 257,
+ 'period_embedding_dim' : 64,
+ 'signal_embedding_dim' : 128,
+ 'signal_levels' : 256,
+ 'feature_dimension' : 19,
+ 'output_levels' : 256,
+ 'lpc_gamma' : 0.9,
+ 'features' : ['cepstrum', 'periods', 'pitch_corr'],
+ 'signals' : ['last_signal', 'prediction', 'last_error'],
+ 'input_layout' : { 'signals' : {'last_signal' : 0, 'prediction' : 1, 'last_error' : 2},
+ 'features' : {'cepstrum' : [0, 18], 'pitch_corr' : [18, 19]} },
+ 'target' : 'error',
+ 'feature_history' : 2,
+ 'feature_lookahead' : 2,
+ 'sparsification' : {
+ 'gru_a' : {
+ 'start' : 10000,
+ 'stop' : 30000,
+ 'interval' : 100,
+ 'exponent' : 3,
+ 'params' : {
+ 'W_hr' : (0.05, [4, 8], True),
+ 'W_hz' : (0.05, [4, 8], True),
+ 'W_hn' : (0.2, [4, 8], True)
+ },
+ },
+ 'gru_b' : {
+ 'start' : 10000,
+ 'stop' : 30000,
+ 'interval' : 100,
+ 'exponent' : 3,
+ 'params' : {
+ 'W_ir' : (0.5, [4, 8], False),
+ 'W_iz' : (0.5, [4, 8], False),
+ 'W_in' : (0.5, [4, 8], False)
+ },
+ }
+ },
+ 'add_reference_phase' : False,
+ 'reference_phase_dim' : 0
+}
+
+
+
+# multi rate
+subconditioning = {
+ 'subconditioning_a' : {
+ 'number_of_subsamples' : 2,
+ 'method' : 'modulative',
+ 'signals' : ['last_signal', 'prediction', 'last_error'],
+ 'pcm_embedding_size' : 64,
+ 'kwargs' : dict()
+
+ },
+ 'subconditioning_b' : {
+ 'number_of_subsamples' : 2,
+ 'method' : 'modulative',
+ 'signals' : ['last_signal', 'prediction', 'last_error'],
+ 'pcm_embedding_size' : 64,
+ 'kwargs' : dict()
+ }
+}
+
+multi_rate_lpcnet_config = lpcnet_config.copy()
+multi_rate_lpcnet_config['subconditioning'] = subconditioning
+
+training_default = {
+ 'batch_size' : 256,
+ 'epochs' : 20,
+ 'lr' : 1e-3,
+ 'lr_decay_factor' : 2.5e-5,
+ 'adam_betas' : [0.9, 0.99],
+ 'frames_per_sample' : 15
+}
+
+lpcnet_setup = {
+ 'dataset' : '/local/datasets/lpcnet_training',
+ 'lpcnet' : {'config' : lpcnet_config, 'model': 'lpcnet'},
+ 'training' : training_default
+}
+
+multi_rate_lpcnet_setup = copy.deepcopy(lpcnet_setup)
+multi_rate_lpcnet_setup['lpcnet']['config'] = multi_rate_lpcnet_config
+multi_rate_lpcnet_setup['lpcnet']['model'] = 'multi_rate'
+
+setup_dict = {
+ 'lpcnet' : lpcnet_setup,
+ 'multi_rate' : multi_rate_lpcnet_setup
+}
diff --git a/dnn/torch/lpcnet/utils/ulaw.py b/dnn/torch/lpcnet/utils/ulaw.py
new file mode 100644
index 00000000..05c395e9
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/ulaw.py
@@ -0,0 +1,58 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import math as m
+
+import torch
+
+
+
+def ulaw2lin(u):
+ scale_1 = 32768.0 / 255.0
+ u = u - 128
+ s = torch.sign(u)
+ u = torch.abs(u)
+ return s * scale_1 * (torch.exp(u / 128. * m.log(256)) - 1)
+
+
+def lin2ulawq(x):
+ scale = 255.0 / 32768.0
+ s = torch.sign(x)
+ x = torch.abs(x)
+ u = s * (128 * torch.log(1 + scale * x) / m.log(256))
+ u = torch.clip(128 + torch.round(u), 0, 255)
+ return u
+
+def lin2ulaw(x):
+ scale = 255.0 / 32768.0
+ s = torch.sign(x)
+ x = torch.abs(x)
+ u = s * (128 * torch.log(1 + scale * x) / torch.log(256))
+ u = torch.clip(128 + u, 0, 255)
+ return u \ No newline at end of file
diff --git a/dnn/torch/lpcnet/utils/wav.py b/dnn/torch/lpcnet/utils/wav.py
new file mode 100644
index 00000000..d955c328
--- /dev/null
+++ b/dnn/torch/lpcnet/utils/wav.py
@@ -0,0 +1,43 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import wave
+
+def wavwrite16(filename, x, fs):
+ """ writes x as int16 to file with name filename
+
+ If x.dtype is int16 x is written as is. Otherwise,
+ it is scaled by 2**15 - 1 and converted to int16.
+ """
+ if x.dtype != 'int16':
+ x = ((2**15 - 1) * x).astype('int16')
+
+ with wave.open(filename, 'wb') as f:
+ f.setparams((1, 2, fs, len(x), 'NONE', ""))
+ f.writeframes(x.tobytes()) \ No newline at end of file
diff --git a/dnn/torch/neural-pitch/README.md b/dnn/torch/neural-pitch/README.md
new file mode 100644
index 00000000..6323ead5
--- /dev/null
+++ b/dnn/torch/neural-pitch/README.md
@@ -0,0 +1,18 @@
+## Neural Pitch Estimation
+
+- Dataset Installation
+ 1. Download and unzip PTDB Dataset:
+ wget https://www2.spsc.tugraz.at/databases/PTDB-TUG/SPEECH_DATA_ZIPPED.zip
+ unzip SPEECH_DATA_ZIPPED.zip
+
+ 2. Inside "SPEECH DATA" above, run ptdb_process.sh to combine male/female
+
+ 3. To Download and combine demand, simply run download_demand.sh
+
+- LPCNet preparation
+ 1. To extract xcorr, add lpcnet_extractor.c and add relevant functions to lpcnet_enc.c, add source for headers/c files and Makefile.am, and compile to generate ./lpcnet_xcorr_extractor object
+
+- Dataset Augmentation and training (check out arguments to each of the following)
+ 1. Run data_augmentation.py
+ 2. Run training.py using augmented data
+ 3. Run experiments.py
diff --git a/dnn/torch/neural-pitch/data_augmentation.py b/dnn/torch/neural-pitch/data_augmentation.py
new file mode 100644
index 00000000..ee7a3cab
--- /dev/null
+++ b/dnn/torch/neural-pitch/data_augmentation.py
@@ -0,0 +1,149 @@
+"""
+Perform Data Augmentation (Gain, Additive Noise, Random Filtering) on Input TTS Data
+1. Read in chunks and compute clean pitch first
+2. Then add in augmentation (Noise/Level/Response)
+ - Adds filtered noise from the "Demand" dataset, https://zenodo.org/record/1227121#.XRKKxYhKiUk
+ - When using the Demand Dataset, consider each channel as a possible noise input, and keep the first 4 minutes of noise for training
+3. Use this "augmented" audio for feature computation, and compute pitch using CREPE on the clean input
+
+Notes: To ensure consistency with the discovered CREPE offset, we do the following
+- We pad the input audio to the zero-centered CREPE estimator with 80 zeros
+- We pad the input audio to our feature computation with 160 zeros to center them
+"""
+
+import argparse
+parser = argparse.ArgumentParser()
+
+parser.add_argument('data', type=str, help='input raw audio data')
+parser.add_argument('output', type=str, help='output directory')
+parser.add_argument('path_lpcnet_extractor', type=str, help='path to LPCNet extractor object file (generated on compilation)')
+parser.add_argument('noise_dataset', type=str, help='Location of the Demand Datset')
+parser.add_argument('--flag_xcorr', type=bool, help='Flag to additionally dump xcorr features',choices=[True,False],default = False,required = False)
+parser.add_argument('--fraction_input_use', type=float, help='Fraction of input data to consider',default = 0.3,required = False)
+parser.add_argument('--gpu_index', type=int, help='GPU index to use if multiple GPUs',default = 0,required = False)
+parser.add_argument('--choice_augment', type=str, help='Choice of noise augmentation, either use additive synthetic noise or add noise from the demand dataset',choices = ['demand','synthetic'],default = "demand",required = False)
+parser.add_argument('--fraction_clean', type=float, help='Fraction of data to keep clean (that is not augment with anything)',default = 0.2,required = False)
+parser.add_argument('--chunk_size', type=int, help='Number of samples to augment with for each iteration',default = 80000,required = False)
+parser.add_argument('--N', type=int, help='STFT window size',default = 320,required = False)
+parser.add_argument('--H', type=int, help='STFT Hop size',default = 160,required = False)
+parser.add_argument('--freq_keep', type=int, help='Number of Frequencies to keep',default = 30,required = False)
+
+args = parser.parse_args()
+
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_index)
+
+from utils import stft, random_filter
+
+import numpy as np
+import tqdm
+import crepe
+import random
+import glob
+import subprocess
+
+data_full = np.memmap(args.data, dtype=np.int16,mode = 'r')
+data = data_full[:(int)(args.fraction_input_use*data_full.shape[0])]
+
+# list_features = []
+list_cents = []
+list_confidences = []
+
+N = args.N
+H = args.H
+freq_keep = args.freq_keep
+# Minimum/Maximum periods, decided by LPCNet
+min_period = 32
+max_period = 256
+f_ref = 16000/max_period
+chunk_size = args.chunk_size
+num_frames_chunk = chunk_size//H
+list_indices_keep = np.concatenate([np.arange(freq_keep), (N//2 + 1) + np.arange(freq_keep), 2*(N//2 + 1) + np.arange(freq_keep)])
+
+output_IF = np.memmap(args.output + '_iffeat.f32', dtype=np.float32, shape=(((data.shape[0]//chunk_size - 1)//1)*num_frames_chunk,list_indices_keep.shape[0]), mode='w+')
+if args.flag_xcorr:
+ output_xcorr = np.memmap(args.output + '_xcorr.f32', dtype=np.float32, shape=(((data.shape[0]//chunk_size - 1)//1)*num_frames_chunk,257), mode='w+')
+
+fraction_clean = args.fraction_clean
+
+noise_dataset = args.noise_dataset
+
+for i in tqdm.trange((data.shape[0]//chunk_size - 1)//1):
+ chunk = data[i*chunk_size:(i + 1)*chunk_size]/(2**15 - 1)
+
+ # Clean Pitch/Confidence Estimate
+ # Padding input to CREPE by 80 samples to ensure it aligns
+ _, pitch, confidence, _ = crepe.predict(np.concatenate([np.zeros(80),chunk]), 16000, center=True, viterbi=True,verbose=0)
+ cent = 1200*np.log2(np.divide(pitch, f_ref, out=np.zeros_like(pitch), where=pitch!=0) + 1.0e-8)
+
+ # Filter out of range pitches/confidences
+ confidence[pitch < 16000/max_period] = 0
+ confidence[pitch > 16000/min_period] = 0
+
+ # Keep fraction of data clean, augment only 1 minus the fraction
+ if (np.random.rand() > fraction_clean):
+ # Response, generate controlled/random 2nd order IIR filter and filter chunk
+ chunk = random_filter(chunk)
+
+ # Level/Gain response {scale by random gain between 1.0e-3 and 10}
+ # Generate random gain in dB and then convert to scale
+ g_dB = np.random.uniform(low = -60, high = 20, size = 1)
+ # g_dB = 0
+ g = 10**(g_dB/20)
+
+ # Noise Addition {Add random SNR 2nd order randomly colored noise}
+ # Generate noise SNR value and add corresponding noise
+ snr_dB = np.random.uniform(low = -20, high = 30, size = 1)
+
+ if args.choice_augment == 'synthetic':
+ n = np.random.randn(chunk_size)
+ else:
+ list_noisefiles = noise_dataset + '*.wav'
+ noise_file = random.choice(glob.glob(list_noisefiles))
+ n = np.memmap(noise_file, dtype=np.int16,mode = 'r')/(2**15 - 1)
+ rand_range = np.random.randint(low = 0, high = (n.shape[0] - 16000*60 - chunk.shape[0])) # 16000 is subtracted because we will use the last 1 minutes of noise for testing
+ n = n[rand_range:rand_range + chunk.shape[0]]
+
+ # Randomly filter the sampled noise as well
+ n = random_filter(n)
+ # generate random prime number between 0,500 and make those samples of noise 0 (to prevent GRU from picking up temporal patterns)
+ Nprime = random.choice([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541])
+ n[chunk_size - Nprime:] = np.zeros(Nprime)
+ snr_multiplier = np.sqrt((np.sum(np.abs(chunk)**2)/np.sum(np.abs(n)**2))*10**(-snr_dB/10))
+
+ chunk = g*(chunk + snr_multiplier*n)
+
+ # Zero pad input audio by 160 to center the frames
+ spec = stft(x = np.concatenate([np.zeros(160),chunk]), w = 'boxcar', N = N, H = H).T
+ phase_diff = spec*np.conj(np.roll(spec,1,axis = -1))
+ phase_diff = phase_diff/(np.abs(phase_diff) + 1.0e-8)
+ feature = np.concatenate([np.log(np.abs(spec) + 1.0e-8),np.real(phase_diff),np.imag(phase_diff)],axis = 0).T
+ feature = feature[:,list_indices_keep]
+
+ if args.flag_xcorr:
+ # Dump noisy audio into temp file
+ data_temp = np.memmap('./temp_augment.raw', dtype=np.int16, shape=(chunk.shape[0]), mode='w+')
+ # data_temp[:chunk.shape[0]] = (chunk/(np.max(np.abs(chunk)))*(2**15 - 1)).astype(np.int16)
+ data_temp[:chunk.shape[0]] = ((chunk)*(2**15 - 1)).astype(np.int16)
+
+ subprocess.run([args.path_lpcnet_extractor, './temp_augment.raw', './temp_augment_xcorr.f32'])
+ feature_xcorr = np.flip(np.fromfile('./temp_augment_xcorr.f32', dtype='float32').reshape((-1,256),order = 'C'),axis = 1)
+ ones_zero_lag = np.expand_dims(np.ones(feature_xcorr.shape[0]),-1)
+ feature_xcorr = np.concatenate([ones_zero_lag,feature_xcorr],axis = -1)
+
+ os.remove('./temp_augment.raw')
+ os.remove('./temp_augment_xcorr.f32')
+ num_frames = min(cent.shape[0],feature.shape[0],feature_xcorr.shape[0],num_frames_chunk)
+ feature = feature[:num_frames,:]
+ cent = cent[:num_frames]
+ confidence = confidence[:num_frames]
+ feature_xcorr = feature_xcorr[:num_frames]
+ output_IF[i*num_frames_chunk:(i + 1)*num_frames_chunk,:] = feature
+ output_xcorr[i*num_frames_chunk:(i + 1)*num_frames_chunk,:] = feature_xcorr
+ list_cents.append(cent)
+ list_confidences.append(confidence)
+
+list_cents = np.hstack(list_cents)
+list_confidences = np.hstack(list_confidences)
+
+np.save(args.output + '_pitches',np.vstack([list_cents,list_confidences]))
diff --git a/dnn/torch/neural-pitch/download_demand.sh b/dnn/torch/neural-pitch/download_demand.sh
new file mode 100644
index 00000000..0cff06af
--- /dev/null
+++ b/dnn/torch/neural-pitch/download_demand.sh
@@ -0,0 +1,43 @@
+wget https://zenodo.org/record/1227121/files/DKITCHEN_16k.zip
+
+wget https://zenodo.org/record/1227121/files/DLIVING_16k.zip
+
+wget https://zenodo.org/record/1227121/files/DWASHING_16k.zip
+
+wget https://zenodo.org/record/1227121/files/NFIELD_16k.zip
+
+wget https://zenodo.org/record/1227121/files/NPARK_16k.zip
+
+wget https://zenodo.org/record/1227121/files/NRIVER_16k.zip
+
+wget https://zenodo.org/record/1227121/files/OHALLWAY_16k.zip
+
+wget https://zenodo.org/record/1227121/files/OMEETING_16k.zip
+
+wget https://zenodo.org/record/1227121/files/OOFFICE_16k.zip
+
+wget https://zenodo.org/record/1227121/files/PCAFETER_16k.zip
+
+wget https://zenodo.org/record/1227121/files/PRESTO_16k.zip
+
+wget https://zenodo.org/record/1227121/files/PSTATION_16k.zip
+
+wget https://zenodo.org/record/1227121/files/TMETRO_16k.zip
+
+wget https://zenodo.org/record/1227121/files/TCAR_16k.zip
+
+wget https://zenodo.org/record/1227121/files/TBUS_16k.zip
+
+wget https://zenodo.org/record/1227121/files/STRAFFIC_16k.zip
+
+wget https://zenodo.org/record/1227121/files/SPSQUARE_16k.zip
+
+unzip '*.zip'
+
+mkdir -p ./combined_demand_channels/
+for file in */*.wav; do
+parentdir="$(dirname "$file")"
+echo $parentdir
+fname="$(basename "$file")"
+cp $file ./combined_demand_channels/$parentdir+$fname
+done
diff --git a/dnn/torch/neural-pitch/evaluation.py b/dnn/torch/neural-pitch/evaluation.py
new file mode 100644
index 00000000..38ba5765
--- /dev/null
+++ b/dnn/torch/neural-pitch/evaluation.py
@@ -0,0 +1,349 @@
+"""
+Evaluation script to compute the Raw Pitch Accuracy
+Procedure:
+ - Look at all voiced frames in file
+ - Compute number of pitches in those frames that lie within a 50 cent threshold
+ RPA = (Total number of pitches within threshold summed across all files)/(Total number of voiced frames summed accross all files)
+"""
+
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+from prettytable import PrettyTable
+import numpy as np
+import glob
+import random
+import tqdm
+import torch
+import librosa
+import json
+from utils import stft, random_filter, feature_xform
+import subprocess
+import crepe
+
+from models import PitchDNN, PitchDNNIF, PitchDNNXcorr
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def rca(reference,input,voicing,thresh = 25):
+ idx_voiced = np.where(voicing != 0)[0]
+ acc = np.where(np.abs(reference - input)[idx_voiced] < thresh)[0]
+ return acc.shape[0]
+
+def sweep_rca(reference,input,voicing,thresh = 25,ind_arr = np.arange(-10,10)):
+ l = []
+ for i in ind_arr:
+ l.append(rca(reference,np.roll(input,i),voicing,thresh))
+ l = np.array(l)
+
+ return np.max(l)
+
+def rpa(model,device = 'cpu',data_format = 'if'):
+ list_files = glob.glob('/home/ubuntu/Code/Datasets/SPEECH DATA/combined_mic_16k_raw/*.raw')
+ dir_f0 = '/home/ubuntu/Code/Datasets/SPEECH DATA/combine_f0_ptdb/'
+ # random_shuffle = list(np.random.permutation(len(list_files)))
+ random.shuffle(list_files)
+ list_files = list_files[:1000]
+
+ C_all = 0
+ C_all_m = 0
+ C_all_f = 0
+ list_rca_model_all = []
+ list_rca_male_all = []
+ list_rca_female_all = []
+
+ thresh = 50
+ N = 320
+ H = 160
+ freq_keep = 30
+
+ for idx in tqdm.trange(len(list_files)):
+ audio_file = list_files[idx]
+ file_name = os.path.basename(list_files[idx])[:-4]
+
+ audio = np.memmap(list_files[idx], dtype=np.int16)/(2**15 - 1)
+ offset = 432
+ audio = audio[offset:]
+ rmse = np.squeeze(librosa.feature.rms(y = audio,frame_length = 320,hop_length = 160))
+
+ spec = stft(x = np.concatenate([np.zeros(160),audio]), w = 'boxcar', N = N, H = H).T
+ phase_diff = spec*np.conj(np.roll(spec,1,axis = -1))
+ phase_diff = phase_diff/(np.abs(phase_diff) + 1.0e-8)
+ idx_save = np.concatenate([np.arange(freq_keep),(N//2 + 1) + np.arange(freq_keep),2*(N//2 + 1) + np.arange(freq_keep)])
+ feature = np.concatenate([np.log(np.abs(spec) + 1.0e-8),np.real(phase_diff),np.imag(phase_diff)],axis = 0).T
+ feature_if = feature[:,idx_save]
+
+ data_temp = np.memmap('./temp.raw', dtype=np.int16, shape=(audio.shape[0]), mode='w+')
+ data_temp[:audio.shape[0]] = (audio/(np.max(np.abs(audio)))*(2**15 - 1)).astype(np.int16)
+
+ subprocess.run(["../../../lpcnet_xcorr_extractor", './temp.raw', './temp_xcorr.f32'])
+ feature_xcorr = np.flip(np.fromfile('./temp_xcorr.f32', dtype='float32').reshape((-1,256),order = 'C'),axis = 1)
+ ones_zero_lag = np.expand_dims(np.ones(feature_xcorr.shape[0]),-1)
+ feature_xcorr = np.concatenate([ones_zero_lag,feature_xcorr],axis = -1)
+ # feature_xcorr = feature_xform(feature_xcorr)
+
+ os.remove('./temp.raw')
+ os.remove('./temp_xcorr.f32')
+
+ if data_format == 'if':
+ feature = feature_if
+ elif data_format == 'xcorr':
+ feature = feature_xcorr
+ else:
+ indmin = min(feature_if.shape[0],feature_xcorr.shape[0])
+ feature = np.concatenate([feature_xcorr[:indmin,:],feature_if[:indmin,:]],-1)
+
+
+ pitch_file_name = dir_f0 + "ref" + os.path.basename(list_files[idx])[3:-4] + ".f0"
+ pitch = np.loadtxt(pitch_file_name)[:,0]
+ voicing = np.loadtxt(pitch_file_name)[:,1]
+ indmin = min(voicing.shape[0],rmse.shape[0],pitch.shape[0])
+ pitch = pitch[:indmin]
+ voicing = voicing[:indmin]
+ rmse = rmse[:indmin]
+ voicing = voicing*(rmse > 0.05*np.max(rmse))
+ if "mic_F" in audio_file:
+ idx_correct = np.where(pitch < 125)
+ voicing[idx_correct] = 0
+
+ cent = np.rint(1200*np.log2(np.divide(pitch, (16000/256), out=np.zeros_like(pitch), where=pitch!=0) + 1.0e-8)).astype('int')
+
+
+ model_cents = model(torch.from_numpy(np.copy(np.expand_dims(feature,0))).float().to(device))
+ model_cents = 20*model_cents.argmax(dim=1).cpu().detach().squeeze().numpy()
+
+ num_frames = min(cent.shape[0],model_cents.shape[0])
+ pitch = pitch[:num_frames]
+ cent = cent[:num_frames]
+ voicing = voicing[:num_frames]
+ model_cents = model_cents[:num_frames]
+
+ voicing_all = np.copy(voicing)
+ # Forcefully make regions where pitch is <65 or greater than 500 unvoiced for relevant accurate pitch comparisons for our model
+ force_out_of_pitch = np.where(np.logical_or(pitch < 65,pitch > 500)==True)
+ voicing_all[force_out_of_pitch] = 0
+ C_all = C_all + np.where(voicing_all != 0)[0].shape[0]
+
+ list_rca_model_all.append(rca(cent,model_cents,voicing_all,thresh))
+
+ if "mic_M" in audio_file:
+ list_rca_male_all.append(rca(cent,model_cents,voicing_all,thresh))
+ C_all_m = C_all_m + np.where(voicing_all != 0)[0].shape[0]
+ else:
+ list_rca_female_all.append(rca(cent,model_cents,voicing_all,thresh))
+ C_all_f = C_all_f + np.where(voicing_all != 0)[0].shape[0]
+
+ list_rca_model_all = np.array(list_rca_model_all)
+ list_rca_male_all = np.array(list_rca_male_all)
+ list_rca_female_all = np.array(list_rca_female_all)
+
+
+ x = PrettyTable()
+
+ x.field_names = ["Experiment", "Mean RPA"]
+ x.add_row(["Both all pitches", np.sum(list_rca_model_all)/C_all])
+
+ x.add_row(["Male all pitches", np.sum(list_rca_male_all)/C_all_m])
+
+ x.add_row(["Female all pitches", np.sum(list_rca_female_all)/C_all_f])
+
+ print(x)
+
+ return None
+
+def cycle_eval(checkpoint_list, noise_type = 'synthetic', noise_dataset = None, list_snr = [-20,-15,-10,-5,0,5,10,15,20], ptdb_dataset_path = None,fraction = 0.1,thresh = 50):
+ """
+ Cycle through SNR evaluation for list of checkpoints
+ """
+ list_files = glob.glob(ptdb_dataset_path + 'combined_mic_16k/*.raw')
+ dir_f0 = ptdb_dataset_path + 'combined_reference_f0/'
+ random.shuffle(list_files)
+ list_files = list_files[:(int)(fraction*len(list_files))]
+
+ dict_models = {}
+ list_snr.append(np.inf)
+
+ for f in checkpoint_list:
+ if (f!='crepe') and (f!='lpcnet'):
+
+ checkpoint = torch.load(f, map_location='cpu')
+ dict_params = checkpoint['config']
+ if dict_params['data_format'] == 'if':
+ from models import large_if_ccode as model
+ pitch_nn = PitchDNNIF(dict_params['freq_keep']*3,dict_params['gru_dim'],dict_params['output_dim'])
+ elif dict_params['data_format'] == 'xcorr':
+ from models import large_xcorr as model
+ pitch_nn = PitchDNNXcorr(dict_params['xcorr_dim'],dict_params['gru_dim'],dict_params['output_dim'])
+ else:
+ from models import large_joint as model
+ pitch_nn = PitchDNN(dict_params['freq_keep']*3,dict_params['xcorr_dim'],dict_params['gru_dim'],dict_params['output_dim'])
+
+ pitch_nn.load_state_dict(checkpoint['state_dict'])
+
+ N = dict_params['window_size']
+ H = dict_params['hop_factor']
+ freq_keep = dict_params['freq_keep']
+
+ list_mean = []
+ list_std = []
+ for snr_dB in list_snr:
+ C_all = 0
+ C_correct = 0
+ for idx in tqdm.trange(len(list_files)):
+ audio_file = list_files[idx]
+ file_name = os.path.basename(list_files[idx])[:-4]
+
+ audio = np.memmap(list_files[idx], dtype=np.int16)/(2**15 - 1)
+ offset = 432
+ audio = audio[offset:]
+ rmse = np.squeeze(librosa.feature.rms(y = audio,frame_length = N,hop_length = H))
+
+ if noise_type != 'synthetic':
+ list_noisefiles = noise_dataset + '*.wav'
+ noise_file = random.choice(glob.glob(list_noisefiles))
+ n = np.memmap(noise_file, dtype=np.int16,mode = 'r')/(2**15 - 1)
+ rand_range = np.random.randint(low = 0, high = (16000*60*5 - audio.shape[0])) # Last 1 minute of noise used for testing
+ n = n[rand_range:rand_range + audio.shape[0]]
+ else:
+ n = np.random.randn(audio.shape[0])
+ n = random_filter(n)
+
+ snr_multiplier = np.sqrt((np.sum(np.abs(audio)**2)/np.sum(np.abs(n)**2))*10**(-snr_dB/10))
+ audio = audio + snr_multiplier*n
+
+ spec = stft(x = np.concatenate([np.zeros(160),audio]), w = 'boxcar', N = N, H = H).T
+ phase_diff = spec*np.conj(np.roll(spec,1,axis = -1))
+ phase_diff = phase_diff/(np.abs(phase_diff) + 1.0e-8)
+ idx_save = np.concatenate([np.arange(freq_keep),(N//2 + 1) + np.arange(freq_keep),2*(N//2 + 1) + np.arange(freq_keep)])
+ feature = np.concatenate([np.log(np.abs(spec) + 1.0e-8),np.real(phase_diff),np.imag(phase_diff)],axis = 0).T
+ feature_if = feature[:,idx_save]
+
+ data_temp = np.memmap('./temp.raw', dtype=np.int16, shape=(audio.shape[0]), mode='w+')
+ # data_temp[:audio.shape[0]] = (audio/(np.max(np.abs(audio)))*(2**15 - 1)).astype(np.int16)
+ data_temp[:audio.shape[0]] = ((audio)*(2**15 - 1)).astype(np.int16)
+
+ subprocess.run(["../../../lpcnet_xcorr_extractor", './temp.raw', './temp_xcorr.f32'])
+ feature_xcorr = np.flip(np.fromfile('./temp_xcorr.f32', dtype='float32').reshape((-1,256),order = 'C'),axis = 1)
+ ones_zero_lag = np.expand_dims(np.ones(feature_xcorr.shape[0]),-1)
+ feature_xcorr = np.concatenate([ones_zero_lag,feature_xcorr],axis = -1)
+
+ os.remove('./temp.raw')
+ os.remove('./temp_xcorr.f32')
+
+ if dict_params['data_format'] == 'if':
+ feature = feature_if
+ elif dict_params['data_format'] == 'xcorr':
+ feature = feature_xcorr
+ else:
+ indmin = min(feature_if.shape[0],feature_xcorr.shape[0])
+ feature = np.concatenate([feature_xcorr[:indmin,:],feature_if[:indmin,:]],-1)
+
+ pitch_file_name = dir_f0 + "ref" + os.path.basename(list_files[idx])[3:-4] + ".f0"
+ pitch = np.loadtxt(pitch_file_name)[:,0]
+ voicing = np.loadtxt(pitch_file_name)[:,1]
+ indmin = min(voicing.shape[0],rmse.shape[0],pitch.shape[0])
+ pitch = pitch[:indmin]
+ voicing = voicing[:indmin]
+ rmse = rmse[:indmin]
+ voicing = voicing*(rmse > 0.05*np.max(rmse))
+ if "mic_F" in audio_file:
+ idx_correct = np.where(pitch < 125)
+ voicing[idx_correct] = 0
+
+ cent = np.rint(1200*np.log2(np.divide(pitch, (16000/256), out=np.zeros_like(pitch), where=pitch!=0) + 1.0e-8)).astype('int')
+
+ model_cents = pitch_nn(torch.from_numpy(np.copy(np.expand_dims(feature,0))).float().to(device))
+ model_cents = 20*model_cents.argmax(dim=1).cpu().detach().squeeze().numpy()
+
+ num_frames = min(cent.shape[0],model_cents.shape[0])
+ pitch = pitch[:num_frames]
+ cent = cent[:num_frames]
+ voicing = voicing[:num_frames]
+ model_cents = model_cents[:num_frames]
+
+ voicing_all = np.copy(voicing)
+ # Forcefully make regions where pitch is <65 or greater than 500 unvoiced for relevant accurate pitch comparisons for our model
+ force_out_of_pitch = np.where(np.logical_or(pitch < 65,pitch > 500)==True)
+ voicing_all[force_out_of_pitch] = 0
+ C_all = C_all + np.where(voicing_all != 0)[0].shape[0]
+
+ C_correct = C_correct + rca(cent,model_cents,voicing_all,thresh)
+ list_mean.append(C_correct/C_all)
+ else:
+ fname = f
+ list_mean = []
+ list_std = []
+ for snr_dB in list_snr:
+ C_all = 0
+ C_correct = 0
+ for idx in tqdm.trange(len(list_files)):
+ audio_file = list_files[idx]
+ file_name = os.path.basename(list_files[idx])[:-4]
+
+ audio = np.memmap(list_files[idx], dtype=np.int16)/(2**15 - 1)
+ offset = 432
+ audio = audio[offset:]
+ rmse = np.squeeze(librosa.feature.rms(y = audio,frame_length = 320,hop_length = 160))
+
+ if noise_type != 'synthetic':
+ list_noisefiles = noise_dataset + '*.wav'
+ noise_file = random.choice(glob.glob(list_noisefiles))
+ n = np.memmap(noise_file, dtype=np.int16,mode = 'r')/(2**15 - 1)
+ rand_range = np.random.randint(low = 0, high = (16000*60*5 - audio.shape[0])) # Last 1 minute of noise used for testing
+ n = n[rand_range:rand_range + audio.shape[0]]
+ else:
+ n = np.random.randn(audio.shape[0])
+ n = random_filter(n)
+
+ snr_multiplier = np.sqrt((np.sum(np.abs(audio)**2)/np.sum(np.abs(n)**2))*10**(-snr_dB/10))
+ audio = audio + snr_multiplier*n
+
+ if (f == 'crepe'):
+ _, model_frequency, _, _ = crepe.predict(np.concatenate([np.zeros(80),audio]), 16000, viterbi=True,center=True,verbose=0)
+ model_cents = 1200*np.log2(model_frequency/(16000/256) + 1.0e-8)
+ else:
+ data_temp = np.memmap('./temp.raw', dtype=np.int16, shape=(audio.shape[0]), mode='w+')
+ # data_temp[:audio.shape[0]] = (audio/(np.max(np.abs(audio)))*(2**15 - 1)).astype(np.int16)
+ data_temp[:audio.shape[0]] = ((audio)*(2**15 - 1)).astype(np.int16)
+
+ subprocess.run(["../../../lpcnet_xcorr_extractor", './temp.raw', './temp_xcorr.f32', './temp_period.f32'])
+ feature_xcorr = np.fromfile('./temp_period.f32', dtype='float32')
+ model_cents = 1200*np.log2((256/feature_xcorr + 1.0e-8) + 1.0e-8)
+
+ os.remove('./temp.raw')
+ os.remove('./temp_xcorr.f32')
+ os.remove('./temp_period.f32')
+
+
+ pitch_file_name = dir_f0 + "ref" + os.path.basename(list_files[idx])[3:-4] + ".f0"
+ pitch = np.loadtxt(pitch_file_name)[:,0]
+ voicing = np.loadtxt(pitch_file_name)[:,1]
+ indmin = min(voicing.shape[0],rmse.shape[0],pitch.shape[0])
+ pitch = pitch[:indmin]
+ voicing = voicing[:indmin]
+ rmse = rmse[:indmin]
+ voicing = voicing*(rmse > 0.05*np.max(rmse))
+ if "mic_F" in audio_file:
+ idx_correct = np.where(pitch < 125)
+ voicing[idx_correct] = 0
+
+ cent = np.rint(1200*np.log2(np.divide(pitch, (16000/256), out=np.zeros_like(pitch), where=pitch!=0) + 1.0e-8)).astype('int')
+ num_frames = min(cent.shape[0],model_cents.shape[0])
+ pitch = pitch[:num_frames]
+ cent = cent[:num_frames]
+ voicing = voicing[:num_frames]
+ model_cents = model_cents[:num_frames]
+
+ voicing_all = np.copy(voicing)
+ # Forcefully make regions where pitch is <65 or greater than 500 unvoiced for relevant accurate pitch comparisons for our model
+ force_out_of_pitch = np.where(np.logical_or(pitch < 65,pitch > 500)==True)
+ voicing_all[force_out_of_pitch] = 0
+ C_all = C_all + np.where(voicing_all != 0)[0].shape[0]
+
+ C_correct = C_correct + rca(cent,model_cents,voicing_all,thresh)
+ list_mean.append(C_correct/C_all)
+ dict_models[fname] = {}
+ dict_models[fname]['list_SNR'] = list_mean[:-1]
+ dict_models[fname]['inf'] = list_mean[-1]
+
+ return dict_models
diff --git a/dnn/torch/neural-pitch/experiments.py b/dnn/torch/neural-pitch/experiments.py
new file mode 100644
index 00000000..bc8ea7e3
--- /dev/null
+++ b/dnn/torch/neural-pitch/experiments.py
@@ -0,0 +1,38 @@
+"""
+Running the experiments;
+ 1. RCA vs SNR for our models, CREPE, LPCNet
+"""
+
+import argparse
+parser = argparse.ArgumentParser()
+
+parser.add_argument('ptdb_root', type=str, help='Root Directory for PTDB generated by running ptdb_process.sh ')
+parser.add_argument('output', type=str, help='Output dump file name')
+parser.add_argument('method', type=str, help='Output Directory to save experiment dumps',choices=['model','lpcnet','crepe'])
+parser.add_argument('--noise_dataset', type=str, help='Location of the Demand Datset',default = './',required=False)
+parser.add_argument('--noise_type', type=str, help='Type of additive noise',default = 'synthetic',choices=['synthetic','demand'],required=False)
+parser.add_argument('--pth_file', type=str, help='.pth file to analyze',default = './',required = False)
+parser.add_argument('--fraction_files_analyze', type=float, help='Fraction of PTDB dataset to test on',default = 1,required = False)
+parser.add_argument('--threshold_rca', type=float, help='Cent threshold when computing RCA',default = 50,required = False)
+parser.add_argument('--gpu_index', type=int, help='GPU index to use if multiple GPUs',default = 0,required = False)
+
+args = parser.parse_args()
+
+import os
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_index)
+
+import json
+from evaluation import cycle_eval
+
+if args.method == 'model':
+ dict_store = cycle_eval([args.pth_file], noise_type = args.noise_type, noise_dataset = args.noise_dataset, list_snr = [-20,-15,-10,-5,0,5,10,15,20], ptdb_dataset_path = args.ptdb_root,fraction = args.fraction_files_analyze,thresh = args.threshold_rca)
+else:
+ dict_store = cycle_eval([args.method], noise_type = args.noise_type, noise_dataset = args.noise_dataset, list_snr = [-20,-15,-10,-5,0,5,10,15,20], ptdb_dataset_path = args.ptdb_root,fraction = args.fraction_files_analyze,thresh = args.threshold_rca)
+
+dict_store["method"] = args.method
+if args.method == 'model':
+ dict_store['pth'] = args.pth_file
+
+with open(args.output, 'w') as fp:
+ json.dump(dict_store, fp)
diff --git a/dnn/torch/neural-pitch/export_neuralpitch_weights.py b/dnn/torch/neural-pitch/export_neuralpitch_weights.py
new file mode 100644
index 00000000..577ec882
--- /dev/null
+++ b/dnn/torch/neural-pitch/export_neuralpitch_weights.py
@@ -0,0 +1,109 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../weight-exchange'))
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint', type=str, help='model checkpoint')
+parser.add_argument('output_dir', type=str, help='output folder')
+
+args = parser.parse_args()
+
+import torch
+import numpy as np
+
+from models import PitchDNN
+from wexchange.torch import dump_torch_weights
+from wexchange.c_export import CWriter, print_vector
+
+def c_export(args, model):
+
+ message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}"
+
+ writer = CWriter(os.path.join(args.output_dir, "pitchdnn_data"), message=message, model_struct_name='PitchDNN')
+ writer.header.write(
+f"""
+#include "opus_types.h"
+"""
+ )
+
+ dense_layers = [
+ ('if_upsample.0', "dense_if_upsampler_1"),
+ ('if_upsample.2', "dense_if_upsampler_2"),
+ ('downsample.0', "dense_downsampler"),
+ ("upsample.0", "dense_final_upsampler")
+ ]
+
+
+ for name, export_name in dense_layers:
+ layer = model.get_submodule(name)
+ dump_torch_weights(writer, layer, name=export_name, verbose=True, quantize=True, scale=None)
+
+ conv_layers = [
+ ('conv.1', "conv2d_1"),
+ ('conv.4', "conv2d_2")
+ ]
+
+
+ for name, export_name in conv_layers:
+ layer = model.get_submodule(name)
+ dump_torch_weights(writer, layer, name=export_name, verbose=True)
+
+
+ gru_layers = [
+ ("GRU", "gru_1"),
+ ]
+
+ max_rnn_units = max([dump_torch_weights(writer, model.get_submodule(name), export_name, verbose=True, input_sparse=False, quantize=True, scale=None, recurrent_scale=None)
+ for name, export_name in gru_layers])
+
+ writer.header.write(
+f"""
+
+#define PITCH_DNN_MAX_RNN_UNITS {max_rnn_units}
+
+"""
+ )
+
+ writer.close()
+
+
+if __name__ == "__main__":
+
+ os.makedirs(args.output_dir, exist_ok=True)
+ model = PitchDNN()
+ checkpoint = torch.load(args.checkpoint, map_location='cpu')
+ model.load_state_dict(checkpoint['state_dict'])
+ c_export(args, model)
diff --git a/dnn/torch/neural-pitch/models.py b/dnn/torch/neural-pitch/models.py
new file mode 100644
index 00000000..ce4977fd
--- /dev/null
+++ b/dnn/torch/neural-pitch/models.py
@@ -0,0 +1,178 @@
+"""
+Pitch Estimation Models and dataloaders
+ - Classification Based (Input features, output logits)
+"""
+
+import torch
+import numpy as np
+
+class PitchDNNIF(torch.nn.Module):
+
+ def __init__(self, input_dim=88, gru_dim=64, output_dim=192):
+ super().__init__()
+
+ self.activation = torch.nn.Tanh()
+ self.initial = torch.nn.Linear(input_dim, gru_dim)
+ self.hidden = torch.nn.Linear(gru_dim, gru_dim)
+ self.gru = torch.nn.GRU(input_size=gru_dim, hidden_size=gru_dim, batch_first=True)
+ self.upsample = torch.nn.Linear(gru_dim, output_dim)
+
+ def forward(self, x):
+
+ x = self.initial(x)
+ x = self.activation(x)
+ x = self.hidden(x)
+ x = self.activation(x)
+ x,_ = self.gru(x)
+ x = self.upsample(x)
+ x = self.activation(x)
+ x = x.permute(0,2,1)
+
+ return x
+
+class PitchDNNXcorr(torch.nn.Module):
+
+ def __init__(self, input_dim=90, gru_dim=64, output_dim=192):
+ super().__init__()
+
+ self.activation = torch.nn.Tanh()
+
+ self.conv = torch.nn.Sequential(
+ torch.nn.ZeroPad2d((2, 0, 1, 1)),
+ torch.nn.Conv2d(1, 8, 3, bias=True),
+ self.activation,
+ torch.nn.ZeroPad2d((2,0,1,1)),
+ torch.nn.Conv2d(8, 8, 3, bias=True),
+ self.activation,
+ torch.nn.ZeroPad2d((2,0,1,1)),
+ torch.nn.Conv2d(8, 1, 3, bias=True),
+ self.activation,
+ )
+
+ self.downsample = torch.nn.Sequential(
+ torch.nn.Linear(input_dim, gru_dim),
+ self.activation
+ )
+ self.GRU = torch.nn.GRU(input_size=gru_dim, hidden_size=gru_dim, num_layers=1, batch_first=True)
+ self.upsample = torch.nn.Sequential(
+ torch.nn.Linear(gru_dim,output_dim),
+ self.activation
+ )
+
+ def forward(self, x):
+ x = self.conv(x.unsqueeze(-1).permute(0,3,2,1)).squeeze(1)
+ x,_ = self.GRU(self.downsample(x.permute(0,2,1)))
+ x = self.upsample(x).permute(0,2,1)
+
+ return x
+
+class PitchDNN(torch.nn.Module):
+ """
+ Joint IF-xcorr
+ 1D CNN on IF, merge with xcorr, 2D CNN on merged + GRU
+ """
+
+ def __init__(self,input_IF_dim=88, input_xcorr_dim=224, gru_dim=64, output_dim=192):
+ super().__init__()
+
+ self.activation = torch.nn.Tanh()
+
+ self.if_upsample = torch.nn.Sequential(
+ torch.nn.Linear(input_IF_dim,64),
+ self.activation,
+ torch.nn.Linear(64,64),
+ self.activation,
+ )
+
+ self.conv = torch.nn.Sequential(
+ torch.nn.ZeroPad2d((2,0,1,1)),
+ torch.nn.Conv2d(1, 4, 3, bias=True),
+ self.activation,
+ torch.nn.ZeroPad2d((2,0,1,1)),
+ torch.nn.Conv2d(4, 1, 3, bias=True),
+ self.activation,
+ )
+
+ self.downsample = torch.nn.Sequential(
+ torch.nn.Linear(64 + input_xcorr_dim, gru_dim),
+ self.activation
+ )
+ self.GRU = torch.nn.GRU(input_size=gru_dim, hidden_size=gru_dim, num_layers=1, batch_first=True)
+ self.upsample = torch.nn.Sequential(
+ torch.nn.Linear(gru_dim, output_dim)
+ )
+
+ def forward(self, x):
+ xcorr_feat = x[:,:,:224]
+ if_feat = x[:,:,224:]
+ xcorr_feat = self.conv(xcorr_feat.unsqueeze(-1).permute(0,3,2,1)).squeeze(1).permute(0,2,1)
+ if_feat = self.if_upsample(if_feat)
+ x = torch.cat([xcorr_feat,if_feat],axis = - 1)
+ x,_ = self.GRU(self.downsample(x))
+ x = self.upsample(x).permute(0,2,1)
+
+ return x
+
+
+# Dataloaders
+class Loader(torch.utils.data.Dataset):
+ def __init__(self, features_if, file_pitch, confidence_threshold=0.4, dimension_if=30, context=100):
+ self.if_feat = np.memmap(features_if, dtype=np.float32).reshape(-1,3*dimension_if)
+
+ # Resolution of 20 cents
+ self.cents = np.rint(np.load(file_pitch)[0,:]/20)
+ self.cents = np.clip(self.cents,0,179)
+ self.confidence = np.load(file_pitch)[1,:]
+
+ # Filter confidence for CREPE
+ self.confidence[self.confidence < confidence_threshold] = 0
+ self.context = context
+ # Clip both to same size
+ size_common = min(self.if_feat.shape[0], self.cents.shape[0])
+ self.if_feat = self.if_feat[:size_common,:]
+ self.cents = self.cents[:size_common]
+ self.confidence = self.confidence[:size_common]
+
+ frame_max = self.if_feat.shape[0]//context
+ self.if_feat = np.reshape(self.if_feat[:frame_max*context, :],(frame_max, context,3*dimension_if))
+ self.cents = np.reshape(self.cents[:frame_max * context],(frame_max, context))
+ self.confidence = np.reshape(self.confidence[:frame_max*context],(frame_max, context))
+
+ def __len__(self):
+ return self.if_feat.shape[0]
+
+ def __getitem__(self, index):
+ return torch.from_numpy(self.if_feat[index,:,:]), torch.from_numpy(self.cents[index]), torch.from_numpy(self.confidence[index])
+
+class PitchDNNDataloader(torch.utils.data.Dataset):
+ def __init__(self, features, file_pitch, confidence_threshold=0.4, context=100, choice_data='both'):
+ self.feat = np.memmap(features, mode='r', dtype=np.int8).reshape(-1,312)
+ self.xcorr = self.feat[:,:224]
+ self.if_feat = self.feat[:,224:]
+ ground_truth = np.memmap(file_pitch, mode='r', dtype=np.float32).reshape(-1,2)
+ self.cents = np.rint(60*np.log2(ground_truth[:,0]/62.5))
+ mask = (self.cents>=0).astype('float32') * (self.cents<=180).astype('float32')
+ self.cents = np.clip(self.cents,0,179)
+ self.confidence = ground_truth[:,1] * mask
+ # Filter confidence for CREPE
+ self.confidence[self.confidence < confidence_threshold] = 0
+ self.context = context
+
+ self.choice_data = choice_data
+
+ frame_max = self.if_feat.shape[0]//context
+ self.if_feat = np.reshape(self.if_feat[:frame_max*context,:], (frame_max, context, 88))
+ self.cents = np.reshape(self.cents[:frame_max*context], (frame_max,context))
+ self.xcorr = np.reshape(self.xcorr[:frame_max*context,:], (frame_max,context, 224))
+ self.confidence = np.reshape(self.confidence[:frame_max*context], (frame_max, context))
+
+ def __len__(self):
+ return self.if_feat.shape[0]
+
+ def __getitem__(self, index):
+ if self.choice_data == 'both':
+ return torch.cat([torch.from_numpy((1./127)*self.xcorr[index,:,:]), torch.from_numpy((1./127)*self.if_feat[index,:,:])], dim=-1), torch.from_numpy(self.cents[index]), torch.from_numpy(self.confidence[index])
+ elif self.choice_data == 'if':
+ return torch.from_numpy((1./127)*self.if_feat[index,:,:]),torch.from_numpy(self.cents[index]),torch.from_numpy(self.confidence[index])
+ else:
+ return torch.from_numpy((1./127)*self.xcorr[index,:,:]),torch.from_numpy(self.cents[index]),torch.from_numpy(self.confidence[index])
diff --git a/dnn/torch/neural-pitch/neural_pitch_update.py b/dnn/torch/neural-pitch/neural_pitch_update.py
new file mode 100644
index 00000000..aa2caf99
--- /dev/null
+++ b/dnn/torch/neural-pitch/neural_pitch_update.py
@@ -0,0 +1,179 @@
+import argparse
+parser = argparse.ArgumentParser()
+
+parser.add_argument('features', type=str, help='Features generated from dump_data')
+parser.add_argument('data', type=str, help='Data generated from dump_data (offset by 5ms)')
+parser.add_argument('output', type=str, help='output .f32 feature file with replaced neural pitch')
+parser.add_argument('checkpoint', type=str, help='model checkpoint file')
+parser.add_argument('path_lpcnet_extractor', type=str, help='path to LPCNet extractor object file (generated on compilation)')
+parser.add_argument('--device', type=str, help='compute device',default = None,required = False)
+parser.add_argument('--replace_xcorr', type = bool, default = False, help='Replace LPCNet xcorr with updated one')
+
+args = parser.parse_args()
+
+import os
+
+from utils import stft, random_filter
+import subprocess
+import numpy as np
+import json
+import torch
+import tqdm
+
+from models import PitchDNNIF, PitchDNNXcorr, PitchDNN
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+if device is not None:
+ device = torch.device(args.device)
+
+# Loading the appropriate model
+checkpoint = torch.load(args.checkpoint, map_location='cpu')
+dict_params = checkpoint['config']
+
+if dict_params['data_format'] == 'if':
+ pitch_nn = PitchDNNIF(dict_params['freq_keep']*3, dict_params['gru_dim'], dict_params['output_dim'])
+elif dict_params['data_format'] == 'xcorr':
+ pitch_nn = PitchDNNXcorr(dict_params['xcorr_dim'], dict_params['gru_dim'], dict_params['output_dim'])
+else:
+ pitch_nn = PitchDNN(dict_params['freq_keep']*3, dict_params['xcorr_dim'], dict_params['gru_dim'], dict_params['output_dim'])
+
+pitch_nn.load_state_dict(checkpoint['state_dict'])
+pitch_nn = pitch_nn.to(device)
+
+N = dict_params['window_size']
+H = dict_params['hop_factor']
+freq_keep = dict_params['freq_keep']
+
+os.environ["OMP_NUM_THREADS"] = "16"
+
+
+def run_lpc(signal, lpcs, frame_length=160):
+ num_frames, lpc_order = lpcs.shape
+
+ prediction = np.concatenate(
+ [- np.convolve(signal[i * frame_length : (i + 1) * frame_length + lpc_order - 1], lpcs[i], mode='valid') for i in range(num_frames)]
+ )
+ error = signal[lpc_order :] - prediction
+
+ return prediction, error
+
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ features = np.memmap(args.features, dtype=np.float32,mode = 'r').reshape((-1, 36))
+ data = np.memmap(args.data, dtype=np.int16,mode = 'r').reshape((-1, 2))
+
+ num_frames = features.shape[0]
+ feature_dim = features.shape[1]
+
+ assert feature_dim == 36
+
+ output = np.memmap(args.output, dtype=np.float32, shape=(num_frames, feature_dim), mode='w+')
+ output[:, :36] = features
+
+ # lpc coefficients and signal
+ lpcs = features[:, 20:36]
+ sig = data[:, 1]
+
+ # parameters
+
+ # constants
+ pitch_min = 32
+ pitch_max = 256
+ lpc_order = 16
+ fs = 16000
+ frame_length = 160
+ overlap_frames = 100
+ chunk_size = 10000
+ history_length = frame_length * overlap_frames
+ history = np.zeros(history_length, dtype=np.int16)
+ pitch_position=18
+ xcorr_position=19
+ conf_position=36
+
+ num_frames = len(sig) // 160 - 1
+
+ frame_start = 0
+ frame_stop = min(frame_start + chunk_size, num_frames)
+ signal_start = 0
+ signal_stop = frame_stop * frame_length
+
+ niters = (num_frames - 1)//chunk_size
+ for i in tqdm.trange(niters):
+ if (frame_start > num_frames - 1):
+ break
+ chunk = np.concatenate((history, sig[signal_start:signal_stop]))
+ chunk_la = np.concatenate((history, sig[signal_start:signal_stop + 80]))
+
+ # Feature computation
+ spec = stft(x = np.concatenate([np.zeros(80),chunk_la/(2**15 - 1)]), w = 'boxcar', N = N, H = H).T
+ phase_diff = spec*np.conj(np.roll(spec,1,axis = -1))
+ phase_diff = phase_diff/(np.abs(phase_diff) + 1.0e-8)
+ idx_save = np.concatenate([np.arange(freq_keep),(N//2 + 1) + np.arange(freq_keep),2*(N//2 + 1) + np.arange(freq_keep)])
+ feature = np.concatenate([np.log(np.abs(spec) + 1.0e-8),np.real(phase_diff),np.imag(phase_diff)],axis = 0).T
+ feature_if = feature[:,idx_save]
+
+ data_temp = np.memmap('./temp_featcompute_' + dict_params['data_format'] + '_.raw', dtype=np.int16, shape=(chunk.shape[0]), mode='w+')
+ data_temp[:chunk.shape[0]] = chunk_la[80:].astype(np.int16)
+
+ subprocess.run([args.path_lpcnet_extractor, './temp_featcompute_' + dict_params['data_format'] + '_.raw', './temp_featcompute_xcorr_' + dict_params['data_format'] + '_.raw'])
+ feature_xcorr = np.flip(np.fromfile('./temp_featcompute_xcorr_' + dict_params['data_format'] + '_.raw', dtype='float32').reshape((-1,256),order = 'C'),axis = 1)
+ ones_zero_lag = np.expand_dims(np.ones(feature_xcorr.shape[0]),-1)
+ feature_xcorr = np.concatenate([ones_zero_lag,feature_xcorr],axis = -1)
+
+ os.remove('./temp_featcompute_' + dict_params['data_format'] + '_.raw')
+ os.remove('./temp_featcompute_xcorr_' + dict_params['data_format'] + '_.raw')
+
+ if dict_params['data_format'] == 'if':
+ feature = feature_if
+ elif dict_params['data_format'] == 'xcorr':
+ feature = feature_xcorr
+ else:
+ indmin = min(feature_if.shape[0],feature_xcorr.shape[0])
+ feature = np.concatenate([feature_xcorr[:indmin,:],feature_if[:indmin,:]],-1)
+
+ # Compute pitch with my model
+ model_cents = pitch_nn(torch.from_numpy(np.copy(np.expand_dims(feature,0))).float().to(device))
+ model_cents = 20*model_cents.argmax(dim=1).cpu().detach().squeeze().numpy()
+ frequency = 62.5*2**(model_cents/1200)
+
+ frequency = frequency[overlap_frames : overlap_frames + frame_stop - frame_start]
+
+ # convert frequencies to periods
+ periods = np.round(fs / frequency)
+
+ periods = np.clip(periods, pitch_min, pitch_max)
+
+ output[frame_start:frame_stop, pitch_position] = (periods - 100) / 50
+
+ frame_offset = (pitch_max + frame_length - 1) // frame_length
+ offset = frame_offset * frame_length
+ padding = lpc_order
+
+
+ if frame_start < frame_offset:
+ lpc_coeffs = np.concatenate((np.zeros((frame_offset - frame_start, lpc_order), dtype=np.float32), lpcs[:frame_stop]))
+ else:
+ lpc_coeffs = lpcs[frame_start - frame_offset : frame_stop]
+
+ pred, error = run_lpc(chunk[history_length - offset - padding :], lpc_coeffs, frame_length=frame_length)
+
+ xcorr = np.zeros(frame_stop - frame_start)
+ for i, p in enumerate(periods.astype(np.int16)):
+ if p > 0:
+ f1 = error[offset + i * frame_length : offset + (i + 1) * frame_length]
+ f2 = error[offset + i * frame_length - p : offset + (i + 1) * frame_length - p]
+ xcorr[i] = np.dot(f1, f2) / np.sqrt(np.dot(f1, f1) * np.dot(f2, f2) + 1e-6)
+
+ output[frame_start:frame_stop, xcorr_position] = xcorr - 0.5
+
+ # update buffers and indices
+ history = chunk[-history_length :]
+
+ frame_start += chunk_size
+ frame_stop += chunk_size
+ frame_stop = min(frame_stop, num_frames)
+
+ signal_start = frame_start * frame_length
+ signal_stop = frame_stop * frame_length
diff --git a/dnn/torch/neural-pitch/ptdb_process.sh b/dnn/torch/neural-pitch/ptdb_process.sh
new file mode 100644
index 00000000..f4df5465
--- /dev/null
+++ b/dnn/torch/neural-pitch/ptdb_process.sh
@@ -0,0 +1,34 @@
+# Copy into PTDB root directory and run to combine all the male/female raw audio/references into below directories
+
+# Make folder for combined audio
+mkdir -p './combined_mic_16k/'
+# Make folder for combined pitch reference
+mkdir -p './combined_reference_f0/'
+
+# Resample Male Audio
+for i in ./MALE/MIC/**/*.wav; do
+j="$(basename "$i" .wav)"
+echo $j
+sox -r 48000 -b 16 -e signed-integer "$i" -r 16000 -b 16 -e signed-integer ./combined_mic_16k/$j.raw
+done
+
+# Resample Female Audio
+for i in ./FEMALE/MIC/**/*.wav; do
+j="$(basename "$i" .wav)"
+echo $j
+sox -r 48000 -b 16 -e signed-integer "$i" -r 16000 -b 16 -e signed-integer ./combined_mic_16k/$j.raw
+done
+
+# Shift Male reference pitch files
+for i in ./MALE/REF/**/*.f0; do
+j="$(basename "$i" .wav)"
+echo $j
+cp "$i" ./combined_reference_f0/
+done
+
+# Shift Female reference pitch files
+for i in ./FEMALE/REF/**/*.f0; do
+j="$(basename "$i" .wav)"
+echo $j
+cp "$i" ./combined_reference_f0/
+done \ No newline at end of file
diff --git a/dnn/torch/neural-pitch/run_crepe.py b/dnn/torch/neural-pitch/run_crepe.py
new file mode 100644
index 00000000..25d65241
--- /dev/null
+++ b/dnn/torch/neural-pitch/run_crepe.py
@@ -0,0 +1,72 @@
+"""
+Perform Data Augmentation (Gain, Additive Noise, Random Filtering) on Input TTS Data
+1. Read in chunks and compute clean pitch first
+2. Then add in augmentation (Noise/Level/Response)
+ - Adds filtered noise from the "Demand" dataset, https://zenodo.org/record/1227121#.XRKKxYhKiUk
+ - When using the Demand Dataset, consider each channel as a possible noise input, and keep the first 4 minutes of noise for training
+3. Use this "augmented" audio for feature computation, and compute pitch using CREPE on the clean input
+
+Notes: To ensure consistency with the discovered CREPE offset, we do the following
+- We pad the input audio to the zero-centered CREPE estimator with 80 zeros
+- We pad the input audio to our feature computation with 160 zeros to center them
+"""
+
+import argparse
+parser = argparse.ArgumentParser()
+
+parser.add_argument('data', type=str, help='input raw audio data')
+parser.add_argument('output', type=str, help='output directory')
+parser.add_argument('--gpu-index', type=int, help='GPU index to use if multiple GPUs',default = 0,required = False)
+parser.add_argument('--chunk-size-frames', type=int, help='Number of frames to process at a time',default = 100000,required = False)
+
+args = parser.parse_args()
+
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_index)
+
+import numpy as np
+import tqdm
+import crepe
+
+data = np.memmap(args.data, dtype=np.int16,mode = 'r')
+
+# list_features = []
+list_cents = []
+list_confidences = []
+
+min_period = 32
+max_period = 256
+f_ref = 16000/max_period
+chunk_size_frames = args.chunk_size_frames
+chunk_size = chunk_size_frames*160
+
+nb_chunks = (data.shape[0]+79)//chunk_size+1
+
+output_data = np.zeros((0,2),dtype='float32')
+
+for i in tqdm.trange(nb_chunks):
+ if i==0:
+ chunk = np.concatenate([np.zeros(80),data[:chunk_size-80]])
+ elif i==nb_chunks-1:
+ chunk = data[i*chunk_size-80:]
+ else:
+ chunk = data[i*chunk_size-80:(i+1)*chunk_size-80]
+ chunk = chunk/np.array(32767.,dtype='float32')
+
+ # Clean Pitch/Confidence Estimate
+ # Padding input to CREPE by 80 samples to ensure it aligns
+ _, pitch, confidence, _ = crepe.predict(chunk, 16000, center=True, viterbi=True,verbose=0)
+ pitch = pitch[:chunk_size_frames]
+ confidence = confidence[:chunk_size_frames]
+
+
+ # Filter out of range pitches/confidences
+ confidence[pitch < 16000/max_period] = 0
+ confidence[pitch > 16000/min_period] = 0
+ pitch = np.reshape(pitch, (-1, 1))
+ confidence = np.reshape(confidence, (-1, 1))
+ out = np.concatenate([pitch, confidence], axis=-1, dtype='float32')
+ output_data = np.concatenate([output_data, out], axis=0)
+
+
+output_data.tofile(args.output)
diff --git a/dnn/torch/neural-pitch/training.py b/dnn/torch/neural-pitch/training.py
new file mode 100644
index 00000000..62da1351
--- /dev/null
+++ b/dnn/torch/neural-pitch/training.py
@@ -0,0 +1,162 @@
+"""
+Training the neural pitch estimator
+
+"""
+
+import os
+import argparse
+parser = argparse.ArgumentParser()
+
+parser.add_argument('features', type=str, help='.f32 IF Features for training (generated by augmentation script)')
+parser.add_argument('features_pitch', type=str, help='.npy Pitch file for training (generated by augmentation script)')
+parser.add_argument('output_folder', type=str, help='Output directory to store the model weights and config')
+parser.add_argument('data_format', type=str, help='Choice of Input Data',choices=['if','xcorr','both'])
+parser.add_argument('--gpu_index', type=int, help='GPU index to use if multiple GPUs',default = 0,required = False)
+parser.add_argument('--confidence_threshold', type=float, help='Confidence value below which pitch will be neglected during training',default = 0.4,required = False)
+parser.add_argument('--context', type=int, help='Sequence length during training',default = 100,required = False)
+parser.add_argument('--N', type=int, help='STFT window size',default = 320,required = False)
+parser.add_argument('--H', type=int, help='STFT Hop size',default = 160,required = False)
+parser.add_argument('--xcorr_dimension', type=int, help='Dimension of Input cross-correlation',default = 257,required = False)
+parser.add_argument('--freq_keep', type=int, help='Number of Frequencies to keep',default = 30,required = False)
+parser.add_argument('--gru_dim', type=int, help='GRU Dimension',default = 64,required = False)
+parser.add_argument('--output_dim', type=int, help='Output dimension',default = 192,required = False)
+parser.add_argument('--learning_rate', type=float, help='Learning Rate',default = 1.0e-3,required = False)
+parser.add_argument('--epochs', type=int, help='Number of training epochs',default = 50,required = False)
+parser.add_argument('--choice_cel', type=str, help='Choice of Cross Entropy Loss (default or robust)',choices=['default','robust'],default = 'default',required = False)
+parser.add_argument('--prefix', type=str, help="prefix for model export, default: model", default='model')
+parser.add_argument('--initial-checkpoint', type=str, help='initial checkpoint to start training from, default: None', default=None)
+
+
+args = parser.parse_args()
+
+# import os
+# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+# os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_index)
+
+# Fixing the seeds for reproducability
+import time
+np_seed = int(time.time())
+torch_seed = int(time.time())
+
+import torch
+torch.manual_seed(torch_seed)
+import numpy as np
+np.random.seed(np_seed)
+from utils import count_parameters
+import tqdm
+from models import PitchDNN, PitchDNNIF, PitchDNNXcorr, PitchDNNDataloader
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+if args.data_format == 'if':
+ pitch_nn = PitchDNNIF(3 * args.freq_keep - 2, args.gru_dim, args.output_dim)
+elif args.data_format == 'xcorr':
+ pitch_nn = PitchDNNXcorr(args.xcorr_dimension, args.gru_dim, args.output_dim)
+else:
+ pitch_nn = PitchDNN(3 * args.freq_keep - 2, 224, args.gru_dim, args.output_dim)
+
+if type(args.initial_checkpoint) != type(None):
+ checkpoint = torch.load(args.initial_checkpoint, map_location='cpu')
+ pitch_nn.load_state_dict(checkpoint['state_dict'], strict=False)
+
+
+dataset_training = PitchDNNDataloader(args.features,args.features_pitch,args.confidence_threshold,args.context,args.data_format)
+
+def loss_custom(logits,labels,confidence,choice = 'default',nmax = 192,q = 0.7):
+ logits_softmax = torch.nn.Softmax(dim = 1)(logits).permute(0,2,1)
+ labels_one_hot = torch.nn.functional.one_hot(labels.long(),nmax)
+
+ if choice == 'default':
+ # Categorical Cross Entropy
+ CE = -torch.sum(torch.log(logits_softmax*labels_one_hot + 1.0e-6)*labels_one_hot,dim=-1)
+ CE = torch.mean(confidence*CE)
+
+ else:
+ # Robust Cross Entropy
+ CE = (1.0/q)*(1 - torch.sum(torch.pow(logits_softmax*labels_one_hot + 1.0e-7,q),dim=-1) )
+ CE = torch.sum(confidence*CE)
+
+ return CE
+
+def accuracy(logits,labels,confidence,choice = 'default',nmax = 192,q = 0.7):
+ logits_softmax = torch.nn.Softmax(dim = 1)(logits).permute(0,2,1)
+ pred_pitch = torch.argmax(logits_softmax, 2)
+ accuracy = (pred_pitch != labels.long())*1.
+ return 1.-torch.mean(confidence*accuracy)
+
+train_dataset, test_dataset = torch.utils.data.random_split(dataset_training, [0.95,0.05], generator=torch.Generator().manual_seed(torch_seed))
+
+batch_size = 256
+train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=False)
+test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=False)
+
+pitch_nn = pitch_nn.to(device)
+num_params = count_parameters(pitch_nn)
+learning_rate = args.learning_rate
+model_opt = torch.optim.Adam(pitch_nn.parameters(), lr = learning_rate)
+
+num_epochs = args.epochs
+
+for epoch in range(num_epochs):
+ losses = []
+ accs = []
+ pitch_nn.train()
+ with tqdm.tqdm(train_dataloader) as train_epoch:
+ for i, (xi, yi, ci) in enumerate(train_epoch):
+ yi, xi, ci = yi.to(device, non_blocking=True), xi.to(device, non_blocking=True), ci.to(device, non_blocking=True)
+ pi = pitch_nn(xi.float())
+ loss = loss_custom(logits = pi,labels = yi,confidence = ci,choice = args.choice_cel,nmax = args.output_dim)
+ acc = accuracy(logits = pi,labels = yi,confidence = ci,choice = args.choice_cel,nmax = args.output_dim)
+ acc = acc.detach()
+
+ model_opt.zero_grad()
+ loss.backward()
+ model_opt.step()
+
+ losses.append(loss.item())
+ accs.append(acc.item())
+ avg_loss = np.mean(losses)
+ avg_acc = np.mean(accs)
+ train_epoch.set_postfix({"Train Epoch" : epoch, "Train Loss":avg_loss, "acc" : avg_acc.item()})
+
+ if epoch % 5 == 0:
+ pitch_nn.eval()
+ losses = []
+ with tqdm.tqdm(test_dataloader) as test_epoch:
+ for i, (xi, yi, ci) in enumerate(test_epoch):
+ yi, xi, ci = yi.to(device, non_blocking=True), xi.to(device, non_blocking=True), ci.to(device, non_blocking=True)
+ pi = pitch_nn(xi.float())
+ loss = loss_custom(logits = pi,labels = yi,confidence = ci,choice = args.choice_cel,nmax = args.output_dim)
+ losses.append(loss.item())
+ avg_loss = np.mean(losses)
+ test_epoch.set_postfix({"Epoch" : epoch, "Test Loss":avg_loss})
+
+pitch_nn.eval()
+
+config = dict(
+ data_format=args.data_format,
+ epochs=num_epochs,
+ window_size= args.N,
+ hop_factor= args.H,
+ freq_keep=args.freq_keep,
+ batch_size=batch_size,
+ learning_rate=learning_rate,
+ confidence_threshold=args.confidence_threshold,
+ model_parameters=num_params,
+ np_seed=np_seed,
+ torch_seed=torch_seed,
+ xcorr_dim=args.xcorr_dimension,
+ dim_input=3*args.freq_keep - 2,
+ gru_dim=args.gru_dim,
+ output_dim=args.output_dim,
+ choice_cel=args.choice_cel,
+ context=args.context,
+)
+
+model_save_path = os.path.join(args.output_folder, f"{args.prefix}_{args.data_format}.pth")
+checkpoint = {
+ 'state_dict': pitch_nn.state_dict(),
+ 'config': config
+}
+torch.save(checkpoint, model_save_path)
diff --git a/dnn/torch/neural-pitch/utils.py b/dnn/torch/neural-pitch/utils.py
new file mode 100644
index 00000000..8930ad19
--- /dev/null
+++ b/dnn/torch/neural-pitch/utils.py
@@ -0,0 +1,59 @@
+"""
+Utility functions that are commonly used
+"""
+
+import numpy as np
+from scipy.signal import windows, lfilter
+from prettytable import PrettyTable
+
+
+# Source: https://gist.github.com/thongonary/026210fc186eb5056f2b6f1ca362d912
+def count_parameters(model):
+ table = PrettyTable(["Modules", "Parameters"])
+ total_params = 0
+ for name, parameter in model.named_parameters():
+ if not parameter.requires_grad: continue
+ param = parameter.numel()
+ table.add_row([name, param])
+ total_params+=param
+ print(table)
+ print(f"Total Trainable Params: {total_params}")
+ return total_params
+
+def stft(x, w = 'boxcar', N = 320, H = 160):
+ x = np.concatenate([x,np.zeros(N)])
+ # win_custom = np.concatenate([windows.hann(80)[:40],np.ones(240),windows.hann(80)[40:]])
+ return np.stack([np.fft.rfft(x[i:i + N]*windows.get_window(w,N)) for i in np.arange(0,x.shape[0]-N,H)])
+
+def random_filter(x):
+ # Randomly filter x with second order IIR filter with coefficients in between -3/8,3/8
+ filter_coeff = np.random.uniform(low = -3.0/8, high = 3.0/8, size = 4)
+ b = [1,filter_coeff[0],filter_coeff[1]]
+ a = [1,filter_coeff[2],filter_coeff[3]]
+ return lfilter(b,a,x)
+
+def feature_xform(feature):
+ """
+ Take as input the (N * 256) xcorr features output by LPCNet and perform the following
+ 1. Downsample and Upsample by 2 (followed by smoothing)
+ 2. Append positional embeddings (of dim k) coresponding to each xcorr lag
+ """
+
+ from scipy.signal import resample_poly, lfilter
+
+
+ feature_US = lfilter([0.25,0.5,0.25],[1],resample_poly(feature,2,1,axis = 1),axis = 1)[:,:feature.shape[1]]
+ feature_DS = lfilter([0.5,0.5],[1],resample_poly(feature,1,2,axis = 1),axis = 1)
+ Z_append = np.zeros((feature.shape[0],feature.shape[1] - feature_DS.shape[1]))
+ feature_DS = np.concatenate([feature_DS,Z_append],axis = -1)
+
+ # pos_embedding = []
+ # for i in range(k):
+ # pos_embedding.append(np.cos((2**i)*np.pi*((np.repeat(np.arange(feature.shape[1]).reshape(feature.shape[1],1),feature.shape[0],axis = 1)).T/(2*feature.shape[1]))))
+
+ # pos_embedding = np.stack(pos_embedding,axis = -1)
+
+ feature = np.stack((feature_DS,feature,feature_US),axis = -1)
+ # feature = np.concatenate((feature,pos_embedding),axis = -1)
+
+ return feature
diff --git a/dnn/torch/osce/README.md b/dnn/torch/osce/README.md
new file mode 100644
index 00000000..74d1f505
--- /dev/null
+++ b/dnn/torch/osce/README.md
@@ -0,0 +1,65 @@
+# Opus Speech Coding Enhancement
+
+This folder hosts models for enhancing Opus SILK.
+
+## Environment setup
+The code is tested with python 3.11. Conda setup is done via
+
+
+`conda create -n osce python=3.11`
+
+`conda activate osce`
+
+`python -m pip install -r requirements.txt`
+
+
+## Generating training data
+First step is to convert all training items to 16 kHz and 16 bit pcm and then concatenate them. A convenient way to do this is to create a file list and then run
+
+`python scripts/concatenator.py filelist 16000 dataset/clean.s16 --db_min -40 --db_max 0`
+
+which on top provides some random scaling.
+
+Second step is to run a patched version of opus_demo in the dataset folder, which will produce the coded output and add feature files. To build the patched opus_demo binary, check out the exp-neural-silk-enhancement branch and build opus_demo the usual way. Then run
+
+`cd dataset && <path_to_patched_opus_demo>/opus_demo voip 16000 1 9000 -silk_random_switching 249 clean.s16 coded.s16 `
+
+The argument to -silk_random_switching specifies the number of frames after which parameters are switched randomly.
+
+## Regression loss based training
+Create a default setup for LACE or NoLACE via
+
+`python make_default_setup.py model.yml --model lace/nolace --path2dataset <path2dataset>`
+
+Then run
+
+`python train_model.py model.yml <output folder> --no-redirect`
+
+for running the training script in foreground or
+
+`nohup python train_model.py model.yml <output folder> &`
+
+to run it in background. In the latter case the output is written to `<output folder>/out.txt`.
+
+## Adversarial training (NoLACE only)
+Create a default setup for NoLACE via
+
+`python make_default_setup.py nolace_adv.yml --model nolace --adversarial --path2dataset <path2dataset>`
+
+Then run
+
+`python adv_train_model.py nolace_adv.yml <output folder> --no-redirect`
+
+for running the training script in foreground or
+
+`nohup python adv_train_model.py nolace_adv.yml <output folder> &`
+
+to run it in background. In the latter case the output is written to `<output folder>/out.txt`.
+
+## Inference
+Generating inference data is analogous to generating training data. Given an item 'item1.wav' run
+`mkdir item1.se && sox item1.wav -r 16000 -e signed-integer -b 16 item1.raw && cd item1.se && <path_to_patched_opus_demo>/opus_demo voip 16000 1 <bitrate> ../item1.raw noisy.s16`
+
+The folder item1.se then serves as input for the test_model.py script or for the --testdata argument of train_model.py resp. adv_train_model.py
+
+Checkpoints of pre-trained models are located here: https://media.xiph.org/lpcnet/models/lace-20231019.tar.gz \ No newline at end of file
diff --git a/dnn/torch/osce/adv_train_model.py b/dnn/torch/osce/adv_train_model.py
new file mode 100644
index 00000000..7db859e4
--- /dev/null
+++ b/dnn/torch/osce/adv_train_model.py
@@ -0,0 +1,462 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import sys
+import math as m
+import random
+
+import yaml
+
+from tqdm import tqdm
+
+try:
+ import git
+ has_git = True
+except:
+ has_git = False
+
+import torch
+from torch.optim.lr_scheduler import LambdaLR
+import torch.nn.functional as F
+
+from scipy.io import wavfile
+import numpy as np
+import pesq
+
+from data import SilkEnhancementSet
+from models import model_dict
+
+
+from utils.silk_features import load_inference_data
+from utils.misc import count_parameters, retain_grads, get_grad_norm, create_weights
+
+from losses.stft_loss import MRSTFTLoss, MRLogMelLoss
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('setup', type=str, help='setup yaml file')
+parser.add_argument('output', type=str, help='output path')
+parser.add_argument('--device', type=str, help='compute device', default=None)
+parser.add_argument('--initial-checkpoint', type=str, help='initial checkpoint', default=None)
+parser.add_argument('--testdata', type=str, help='path to features and signal for testing', default=None)
+parser.add_argument('--no-redirect', action='store_true', help='disables re-direction of stdout')
+
+args = parser.parse_args()
+
+
+torch.set_num_threads(4)
+
+with open(args.setup, 'r') as f:
+ setup = yaml.load(f.read(), yaml.FullLoader)
+
+checkpoint_prefix = 'checkpoint'
+output_prefix = 'output'
+setup_name = 'setup.yml'
+output_file='out.txt'
+
+
+# check model
+if not 'name' in setup['model']:
+ print(f'warning: did not find model entry in setup, using default PitchPostFilter')
+ model_name = 'pitchpostfilter'
+else:
+ model_name = setup['model']['name']
+
+# prepare output folder
+if os.path.exists(args.output):
+ print("warning: output folder exists")
+
+ reply = input('continue? (y/n): ')
+ while reply not in {'y', 'n'}:
+ reply = input('continue? (y/n): ')
+
+ if reply == 'n':
+ os._exit()
+else:
+ os.makedirs(args.output, exist_ok=True)
+
+checkpoint_dir = os.path.join(args.output, 'checkpoints')
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+# add repo info to setup
+if has_git:
+ working_dir = os.path.split(__file__)[0]
+ try:
+ repo = git.Repo(working_dir, search_parent_directories=True)
+ setup['repo'] = dict()
+ hash = repo.head.object.hexsha
+ urls = list(repo.remote().urls)
+ is_dirty = repo.is_dirty()
+
+ if is_dirty:
+ print("warning: repo is dirty")
+
+ setup['repo']['hash'] = hash
+ setup['repo']['urls'] = urls
+ setup['repo']['dirty'] = is_dirty
+ except:
+ has_git = False
+
+# dump setup
+with open(os.path.join(args.output, setup_name), 'w') as f:
+ yaml.dump(setup, f)
+
+
+ref = None
+if args.testdata is not None:
+
+ testsignal, features, periods, numbits = load_inference_data(args.testdata, **setup['data'])
+
+ inference_test = True
+ inference_folder = os.path.join(args.output, 'inference_test')
+ os.makedirs(os.path.join(args.output, 'inference_test'), exist_ok=True)
+
+ try:
+ ref = np.fromfile(os.path.join(args.testdata, 'clean.s16'), dtype=np.int16)
+ except:
+ pass
+else:
+ inference_test = False
+
+# training parameters
+batch_size = setup['training']['batch_size']
+epochs = setup['training']['epochs']
+lr = setup['training']['lr']
+lr_decay_factor = setup['training']['lr_decay_factor']
+lr_gen = lr * setup['training']['gen_lr_reduction']
+lambda_feat = setup['training']['lambda_feat']
+lambda_reg = setup['training']['lambda_reg']
+adv_target = setup['training'].get('adv_target', 'target')
+
+# load training dataset
+data_config = setup['data']
+data = SilkEnhancementSet(setup['dataset'], **data_config)
+
+# load validation dataset if given
+if 'validation_dataset' in setup:
+ validation_data = SilkEnhancementSet(setup['validation_dataset'], **data_config)
+
+ validation_dataloader = torch.utils.data.DataLoader(validation_data, batch_size=batch_size, drop_last=True, num_workers=4)
+
+ run_validation = True
+else:
+ run_validation = False
+
+# create model
+model = model_dict[model_name](*setup['model']['args'], **setup['model']['kwargs'])
+
+# create discriminator
+disc_name = setup['discriminator']['name']
+disc = model_dict[disc_name](
+ *setup['discriminator']['args'], **setup['discriminator']['kwargs']
+)
+
+# set compute device
+if type(args.device) == type(None):
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+else:
+ device = torch.device(args.device)
+
+# dataloader
+dataloader = torch.utils.data.DataLoader(data, batch_size=batch_size, drop_last=True, shuffle=True, num_workers=4)
+
+# optimizer is introduced to trainable parameters
+parameters = [p for p in model.parameters() if p.requires_grad]
+optimizer = torch.optim.Adam(parameters, lr=lr_gen)
+
+# disc optimizer
+parameters = [p for p in disc.parameters() if p.requires_grad]
+optimizer_disc = torch.optim.Adam(parameters, lr=lr, betas=[0.5, 0.9])
+
+# learning rate scheduler
+scheduler = LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay_factor * x))
+
+if args.initial_checkpoint is not None:
+ print(f"loading state dict from {args.initial_checkpoint}...")
+ chkpt = torch.load(args.initial_checkpoint, map_location=device)
+ model.load_state_dict(chkpt['state_dict'])
+
+ if 'disc_state_dict' in chkpt:
+ print(f"loading discriminator state dict from {args.initial_checkpoint}...")
+ disc.load_state_dict(chkpt['disc_state_dict'])
+
+ if 'optimizer_state_dict' in chkpt:
+ print(f"loading optimizer state dict from {args.initial_checkpoint}...")
+ optimizer.load_state_dict(chkpt['optimizer_state_dict'])
+
+ if 'disc_optimizer_state_dict' in chkpt:
+ print(f"loading discriminator optimizer state dict from {args.initial_checkpoint}...")
+ optimizer_disc.load_state_dict(chkpt['disc_optimizer_state_dict'])
+
+ if 'scheduler_state_disc' in chkpt:
+ print(f"loading scheduler state dict from {args.initial_checkpoint}...")
+ scheduler.load_state_dict(chkpt['scheduler_state_dict'])
+
+ # if 'torch_rng_state' in chkpt:
+ # print(f"setting torch RNG state from {args.initial_checkpoint}...")
+ # torch.set_rng_state(chkpt['torch_rng_state'])
+
+ if 'numpy_rng_state' in chkpt:
+ print(f"setting numpy RNG state from {args.initial_checkpoint}...")
+ np.random.set_state(chkpt['numpy_rng_state'])
+
+ if 'python_rng_state' in chkpt:
+ print(f"setting Python RNG state from {args.initial_checkpoint}...")
+ random.setstate(chkpt['python_rng_state'])
+
+# loss
+w_l1 = setup['training']['loss']['w_l1']
+w_lm = setup['training']['loss']['w_lm']
+w_slm = setup['training']['loss']['w_slm']
+w_sc = setup['training']['loss']['w_sc']
+w_logmel = setup['training']['loss']['w_logmel']
+w_wsc = setup['training']['loss']['w_wsc']
+w_xcorr = setup['training']['loss']['w_xcorr']
+w_sxcorr = setup['training']['loss']['w_sxcorr']
+w_l2 = setup['training']['loss']['w_l2']
+
+w_sum = w_l1 + w_lm + w_sc + w_logmel + w_wsc + w_slm + w_xcorr + w_sxcorr + w_l2
+
+stftloss = MRSTFTLoss(sc_weight=w_sc, log_mag_weight=w_lm, wsc_weight=w_wsc, smooth_log_mag_weight=w_slm, sxcorr_weight=w_sxcorr).to(device)
+logmelloss = MRLogMelLoss().to(device)
+
+def xcorr_loss(y_true, y_pred):
+ dims = list(range(1, len(y_true.shape)))
+
+ loss = 1 - torch.sum(y_true * y_pred, dim=dims) / torch.sqrt(torch.sum(y_true ** 2, dim=dims) * torch.sum(y_pred ** 2, dim=dims) + 1e-9)
+
+ return torch.mean(loss)
+
+def td_l2_norm(y_true, y_pred):
+ dims = list(range(1, len(y_true.shape)))
+
+ loss = torch.mean((y_true - y_pred) ** 2, dim=dims) / (torch.mean(y_pred ** 2, dim=dims) ** .5 + 1e-6)
+
+ return loss.mean()
+
+def td_l1(y_true, y_pred, pow=0):
+ dims = list(range(1, len(y_true.shape)))
+ tmp = torch.mean(torch.abs(y_true - y_pred), dim=dims) / ((torch.mean(torch.abs(y_pred), dim=dims) + 1e-9) ** pow)
+
+ return torch.mean(tmp)
+
+def criterion(x, y):
+
+ return (w_l1 * td_l1(x, y, pow=1) + stftloss(x, y) + w_logmel * logmelloss(x, y)
+ + w_xcorr * xcorr_loss(x, y) + w_l2 * td_l2_norm(x, y)) / w_sum
+
+
+# model checkpoint
+checkpoint = {
+ 'setup' : setup,
+ 'state_dict' : model.state_dict(),
+ 'loss' : -1
+}
+
+
+if not args.no_redirect:
+ print(f"re-directing output to {os.path.join(args.output, output_file)}")
+ sys.stdout = open(os.path.join(args.output, output_file), "w")
+
+
+print("summary:")
+
+print(f"generator: {count_parameters(model.cpu()) / 1e6:5.3f} M parameters")
+if hasattr(model, 'flop_count'):
+ print(f"generator: {model.flop_count(16000) / 1e6:5.3f} MFLOPS")
+print(f"discriminator: {count_parameters(disc.cpu()) / 1e6:5.3f} M parameters")
+
+if ref is not None:
+ noisy = np.fromfile(os.path.join(args.testdata, 'noisy.s16'), dtype=np.int16)
+ initial_mos = pesq.pesq(16000, ref, noisy, mode='wb')
+ print(f"initial MOS (PESQ): {initial_mos}")
+
+best_loss = 1e9
+log_interval = 10
+
+
+m_r = 0
+m_f = 0
+s_r = 1
+s_f = 1
+
+def optimizer_to(optim, device):
+ for param in optim.state.values():
+ if isinstance(param, torch.Tensor):
+ param.data = param.data.to(device)
+ if param._grad is not None:
+ param._grad.data = param._grad.data.to(device)
+ elif isinstance(param, dict):
+ for subparam in param.values():
+ if isinstance(subparam, torch.Tensor):
+ subparam.data = subparam.data.to(device)
+ if subparam._grad is not None:
+ subparam._grad.data = subparam._grad.data.to(device)
+
+optimizer_to(optimizer, device)
+optimizer_to(optimizer_disc, device)
+
+retain_grads(model)
+retain_grads(disc)
+
+for ep in range(1, epochs + 1):
+ print(f"training epoch {ep}...")
+
+ model.to(device)
+ disc.to(device)
+ model.train()
+ disc.train()
+
+ running_disc_loss = 0
+ running_adv_loss = 0
+ running_feature_loss = 0
+ running_reg_loss = 0
+ running_disc_grad_norm = 0
+ running_model_grad_norm = 0
+
+ with tqdm(dataloader, unit='batch', file=sys.stdout) as tepoch:
+ for i, batch in enumerate(tepoch):
+
+ # set gradients to zero
+ optimizer.zero_grad()
+
+ # push batch to device
+ for key in batch:
+ batch[key] = batch[key].to(device)
+
+ target = batch['target'].to(device)
+ disc_target = batch[adv_target].to(device)
+
+ # calculate model output
+ output = model(batch['signals'].permute(0, 2, 1), batch['features'], batch['periods'], batch['numbits'])
+
+ # discriminator update
+ scores_gen = disc(output.detach())
+ scores_real = disc(disc_target.unsqueeze(1))
+
+ disc_loss = 0
+ for score in scores_gen:
+ disc_loss += (((score[-1]) ** 2)).mean()
+ m_f = 0.9 * m_f + 0.1 * score[-1].detach().mean().cpu().item()
+ s_f = 0.9 * s_f + 0.1 * score[-1].detach().std().cpu().item()
+
+ for score in scores_real:
+ disc_loss += (((1 - score[-1]) ** 2)).mean()
+ m_r = 0.9 * m_r + 0.1 * score[-1].detach().mean().cpu().item()
+ s_r = 0.9 * s_r + 0.1 * score[-1].detach().std().cpu().item()
+
+ disc_loss = 0.5 * disc_loss / len(scores_gen)
+ winning_chance = 0.5 * m.erfc( (m_r - m_f) / m.sqrt(2 * (s_f**2 + s_r**2)) )
+
+ disc.zero_grad()
+ disc_loss.backward()
+
+ running_disc_grad_norm += get_grad_norm(disc).detach().cpu().item()
+
+ optimizer_disc.step()
+
+ # generator update
+ scores_gen = disc(output)
+
+ # calculate loss
+ loss_reg = criterion(output.squeeze(1), target)
+
+ num_discs = len(scores_gen)
+ gen_loss = 0
+ for score in scores_gen:
+ gen_loss += (((1 - score[-1]) ** 2)).mean() / num_discs
+
+ loss_feat = 0
+ for k in range(num_discs):
+ num_layers = len(scores_gen[k]) - 1
+ f = 4 / num_discs / num_layers
+ for l in range(num_layers):
+ loss_feat += f * F.l1_loss(scores_gen[k][l], scores_real[k][l].detach())
+
+ model.zero_grad()
+
+ (gen_loss + lambda_feat * loss_feat + lambda_reg * loss_reg).backward()
+
+ optimizer.step()
+
+ # sparsification
+ if hasattr(model, 'sparsifier'):
+ model.sparsifier()
+
+ running_model_grad_norm += get_grad_norm(model).detach().cpu().item()
+ running_adv_loss += gen_loss.detach().cpu().item()
+ running_disc_loss += disc_loss.detach().cpu().item()
+ running_feature_loss += lambda_feat * loss_feat.detach().cpu().item()
+ running_reg_loss += lambda_reg * loss_reg.detach().cpu().item()
+
+ # update status bar
+ if i % log_interval == 0:
+ tepoch.set_postfix(adv_loss=f"{running_adv_loss/(i + 1):8.7f}",
+ disc_loss=f"{running_disc_loss/(i + 1):8.7f}",
+ feat_loss=f"{running_feature_loss/(i + 1):8.7f}",
+ reg_loss=f"{running_reg_loss/(i + 1):8.7f}",
+ model_gradnorm=f"{running_model_grad_norm/(i+1):8.7f}",
+ disc_gradnorm=f"{running_disc_grad_norm/(i+1):8.7f}",
+ wc=f"{100*winning_chance:5.2f}%")
+
+
+ # save checkpoint
+ checkpoint['state_dict'] = model.state_dict()
+ checkpoint['disc_state_dict'] = disc.state_dict()
+ checkpoint['optimizer_state_dict'] = optimizer.state_dict()
+ checkpoint['disc_optimizer_state_dict'] = optimizer_disc.state_dict()
+ checkpoint['scheduler_state_dict'] = scheduler.state_dict()
+ checkpoint['torch_rng_state'] = torch.get_rng_state()
+ checkpoint['numpy_rng_state'] = np.random.get_state()
+ checkpoint['python_rng_state'] = random.getstate()
+ checkpoint['adv_loss'] = running_adv_loss/(i + 1)
+ checkpoint['disc_loss'] = running_disc_loss/(i + 1)
+ checkpoint['feature_loss'] = running_feature_loss/(i + 1)
+ checkpoint['reg_loss'] = running_reg_loss/(i + 1)
+
+
+ if inference_test:
+ print("running inference test...")
+ out = model.process(testsignal, features, periods, numbits).cpu().numpy()
+ wavfile.write(os.path.join(inference_folder, f'{model_name}_epoch_{ep}.wav'), 16000, out)
+ if ref is not None:
+ mos = pesq.pesq(16000, ref, out, mode='wb')
+ print(f"MOS (PESQ): {mos}")
+
+
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_epoch_{ep}.pth'))
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_last.pth'))
+
+
+ print()
+
+print('Done')
diff --git a/dnn/torch/osce/adv_train_vocoder.py b/dnn/torch/osce/adv_train_vocoder.py
new file mode 100644
index 00000000..73e3c9b0
--- /dev/null
+++ b/dnn/torch/osce/adv_train_vocoder.py
@@ -0,0 +1,451 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import sys
+import math as m
+import random
+
+import yaml
+
+from tqdm import tqdm
+
+try:
+ import git
+ has_git = True
+except:
+ has_git = False
+
+import torch
+from torch.optim.lr_scheduler import LambdaLR
+import torch.nn.functional as F
+
+from scipy.io import wavfile
+import numpy as np
+import pesq
+
+from data import LPCNetVocodingDataset
+from models import model_dict
+
+
+from utils.lpcnet_features import load_lpcnet_features
+from utils.misc import count_parameters
+
+from losses.stft_loss import MRSTFTLoss, MRLogMelLoss
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('setup', type=str, help='setup yaml file')
+parser.add_argument('output', type=str, help='output path')
+parser.add_argument('--device', type=str, help='compute device', default=None)
+parser.add_argument('--initial-checkpoint', type=str, help='initial checkpoint', default=None)
+parser.add_argument('--test-features', type=str, help='path to features for testing', default=None)
+parser.add_argument('--no-redirect', action='store_true', help='disables re-direction of stdout')
+
+args = parser.parse_args()
+
+
+torch.set_num_threads(4)
+
+with open(args.setup, 'r') as f:
+ setup = yaml.load(f.read(), yaml.FullLoader)
+
+checkpoint_prefix = 'checkpoint'
+output_prefix = 'output'
+setup_name = 'setup.yml'
+output_file='out.txt'
+
+
+# check model
+if not 'name' in setup['model']:
+ print(f'warning: did not find model entry in setup, using default PitchPostFilter')
+ model_name = 'pitchpostfilter'
+else:
+ model_name = setup['model']['name']
+
+# prepare output folder
+if os.path.exists(args.output):
+ print("warning: output folder exists")
+
+ reply = input('continue? (y/n): ')
+ while reply not in {'y', 'n'}:
+ reply = input('continue? (y/n): ')
+
+ if reply == 'n':
+ os._exit()
+else:
+ os.makedirs(args.output, exist_ok=True)
+
+checkpoint_dir = os.path.join(args.output, 'checkpoints')
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+# add repo info to setup
+if has_git:
+ working_dir = os.path.split(__file__)[0]
+ try:
+ repo = git.Repo(working_dir, search_parent_directories=True)
+ setup['repo'] = dict()
+ hash = repo.head.object.hexsha
+ urls = list(repo.remote().urls)
+ is_dirty = repo.is_dirty()
+
+ if is_dirty:
+ print("warning: repo is dirty")
+
+ setup['repo']['hash'] = hash
+ setup['repo']['urls'] = urls
+ setup['repo']['dirty'] = is_dirty
+ except:
+ has_git = False
+
+# dump setup
+with open(os.path.join(args.output, setup_name), 'w') as f:
+ yaml.dump(setup, f)
+
+
+ref = None
+# prepare inference test if wanted
+inference_test = False
+if type(args.test_features) != type(None):
+ test_features = load_lpcnet_features(args.test_features)
+ features = test_features['features']
+ periods = test_features['periods']
+ inference_folder = os.path.join(args.output, 'inference_test')
+ os.makedirs(inference_folder, exist_ok=True)
+ inference_test = True
+
+
+# training parameters
+batch_size = setup['training']['batch_size']
+epochs = setup['training']['epochs']
+lr = setup['training']['lr']
+lr_decay_factor = setup['training']['lr_decay_factor']
+lr_gen = lr * setup['training']['gen_lr_reduction']
+lambda_feat = setup['training']['lambda_feat']
+lambda_reg = setup['training']['lambda_reg']
+adv_target = setup['training'].get('adv_target', 'target')
+
+
+# load training dataset
+data_config = setup['data']
+data = LPCNetVocodingDataset(setup['dataset'], **data_config)
+
+# load validation dataset if given
+if 'validation_dataset' in setup:
+ validation_data = LPCNetVocodingDataset(setup['validation_dataset'], **data_config)
+
+ validation_dataloader = torch.utils.data.DataLoader(validation_data, batch_size=batch_size, drop_last=True, num_workers=4)
+
+ run_validation = True
+else:
+ run_validation = False
+
+# create model
+model = model_dict[model_name](*setup['model']['args'], **setup['model']['kwargs'])
+
+
+# create discriminator
+disc_name = setup['discriminator']['name']
+disc = model_dict[disc_name](
+ *setup['discriminator']['args'], **setup['discriminator']['kwargs']
+)
+
+
+
+# set compute device
+if type(args.device) == type(None):
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+else:
+ device = torch.device(args.device)
+
+
+
+# dataloader
+dataloader = torch.utils.data.DataLoader(data, batch_size=batch_size, drop_last=True, shuffle=True, num_workers=4)
+
+# optimizer is introduced to trainable parameters
+parameters = [p for p in model.parameters() if p.requires_grad]
+optimizer = torch.optim.Adam(parameters, lr=lr_gen)
+
+# disc optimizer
+parameters = [p for p in disc.parameters() if p.requires_grad]
+optimizer_disc = torch.optim.Adam(parameters, lr=lr, betas=[0.5, 0.9])
+
+# learning rate scheduler
+scheduler = LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay_factor * x))
+
+if args.initial_checkpoint is not None:
+ print(f"loading state dict from {args.initial_checkpoint}...")
+ chkpt = torch.load(args.initial_checkpoint, map_location=device)
+ model.load_state_dict(chkpt['state_dict'])
+
+ if 'disc_state_dict' in chkpt:
+ print(f"loading discriminator state dict from {args.initial_checkpoint}...")
+ disc.load_state_dict(chkpt['disc_state_dict'])
+
+ if 'optimizer_state_dict' in chkpt:
+ print(f"loading optimizer state dict from {args.initial_checkpoint}...")
+ optimizer.load_state_dict(chkpt['optimizer_state_dict'])
+
+ if 'disc_optimizer_state_dict' in chkpt:
+ print(f"loading discriminator optimizer state dict from {args.initial_checkpoint}...")
+ optimizer_disc.load_state_dict(chkpt['disc_optimizer_state_dict'])
+
+ if 'scheduler_state_disc' in chkpt:
+ print(f"loading scheduler state dict from {args.initial_checkpoint}...")
+ scheduler.load_state_dict(chkpt['scheduler_state_dict'])
+
+ # if 'torch_rng_state' in chkpt:
+ # print(f"setting torch RNG state from {args.initial_checkpoint}...")
+ # torch.set_rng_state(chkpt['torch_rng_state'])
+
+ if 'numpy_rng_state' in chkpt:
+ print(f"setting numpy RNG state from {args.initial_checkpoint}...")
+ np.random.set_state(chkpt['numpy_rng_state'])
+
+ if 'python_rng_state' in chkpt:
+ print(f"setting Python RNG state from {args.initial_checkpoint}...")
+ random.setstate(chkpt['python_rng_state'])
+
+# loss
+w_l1 = setup['training']['loss']['w_l1']
+w_lm = setup['training']['loss']['w_lm']
+w_slm = setup['training']['loss']['w_slm']
+w_sc = setup['training']['loss']['w_sc']
+w_logmel = setup['training']['loss']['w_logmel']
+w_wsc = setup['training']['loss']['w_wsc']
+w_xcorr = setup['training']['loss']['w_xcorr']
+w_sxcorr = setup['training']['loss']['w_sxcorr']
+w_l2 = setup['training']['loss']['w_l2']
+
+w_sum = w_l1 + w_lm + w_sc + w_logmel + w_wsc + w_slm + w_xcorr + w_sxcorr + w_l2
+
+stftloss = MRSTFTLoss(sc_weight=w_sc, log_mag_weight=w_lm, wsc_weight=w_wsc, smooth_log_mag_weight=w_slm, sxcorr_weight=w_sxcorr).to(device)
+logmelloss = MRLogMelLoss().to(device)
+
+def xcorr_loss(y_true, y_pred):
+ dims = list(range(1, len(y_true.shape)))
+
+ loss = 1 - torch.sum(y_true * y_pred, dim=dims) / torch.sqrt(torch.sum(y_true ** 2, dim=dims) * torch.sum(y_pred ** 2, dim=dims) + 1e-9)
+
+ return torch.mean(loss)
+
+def td_l2_norm(y_true, y_pred):
+ dims = list(range(1, len(y_true.shape)))
+
+ loss = torch.mean((y_true - y_pred) ** 2, dim=dims) / (torch.mean(y_pred ** 2, dim=dims) ** .5 + 1e-6)
+
+ return loss.mean()
+
+def td_l1(y_true, y_pred, pow=0):
+ dims = list(range(1, len(y_true.shape)))
+ tmp = torch.mean(torch.abs(y_true - y_pred), dim=dims) / ((torch.mean(torch.abs(y_pred), dim=dims) + 1e-9) ** pow)
+
+ return torch.mean(tmp)
+
+def criterion(x, y):
+
+ return (w_l1 * td_l1(x, y, pow=1) + stftloss(x, y) + w_logmel * logmelloss(x, y)
+ + w_xcorr * xcorr_loss(x, y) + w_l2 * td_l2_norm(x, y)) / w_sum
+
+
+# model checkpoint
+checkpoint = {
+ 'setup' : setup,
+ 'state_dict' : model.state_dict(),
+ 'loss' : -1
+}
+
+
+if not args.no_redirect:
+ print(f"re-directing output to {os.path.join(args.output, output_file)}")
+ sys.stdout = open(os.path.join(args.output, output_file), "w")
+
+
+print("summary:")
+
+print(f"generator: {count_parameters(model.cpu()) / 1e6:5.3f} M parameters")
+if hasattr(model, 'flop_count'):
+ print(f"generator: {model.flop_count(16000) / 1e6:5.3f} MFLOPS")
+print(f"discriminator: {count_parameters(disc.cpu()) / 1e6:5.3f} M parameters")
+
+if ref is not None:
+ noisy = np.fromfile(os.path.join(args.testdata, 'noisy.s16'), dtype=np.int16)
+ initial_mos = pesq.pesq(16000, ref, noisy, mode='wb')
+ print(f"initial MOS (PESQ): {initial_mos}")
+
+best_loss = 1e9
+log_interval = 10
+
+
+m_r = 0
+m_f = 0
+s_r = 1
+s_f = 1
+
+def optimizer_to(optim, device):
+ for param in optim.state.values():
+ if isinstance(param, torch.Tensor):
+ param.data = param.data.to(device)
+ if param._grad is not None:
+ param._grad.data = param._grad.data.to(device)
+ elif isinstance(param, dict):
+ for subparam in param.values():
+ if isinstance(subparam, torch.Tensor):
+ subparam.data = subparam.data.to(device)
+ if subparam._grad is not None:
+ subparam._grad.data = subparam._grad.data.to(device)
+
+optimizer_to(optimizer, device)
+optimizer_to(optimizer_disc, device)
+
+
+for ep in range(1, epochs + 1):
+ print(f"training epoch {ep}...")
+
+ model.to(device)
+ disc.to(device)
+ model.train()
+ disc.train()
+
+ running_disc_loss = 0
+ running_adv_loss = 0
+ running_feature_loss = 0
+ running_reg_loss = 0
+
+ with tqdm(dataloader, unit='batch', file=sys.stdout) as tepoch:
+ for i, batch in enumerate(tepoch):
+
+ # set gradients to zero
+ optimizer.zero_grad()
+
+ # push batch to device
+ for key in batch:
+ batch[key] = batch[key].to(device)
+
+ target = batch['target'].to(device)
+ disc_target = batch[adv_target].to(device)
+
+ # calculate model output
+ output = model(batch['features'], batch['periods'])
+
+ # discriminator update
+ scores_gen = disc(output.detach())
+ scores_real = disc(disc_target.unsqueeze(1))
+
+ disc_loss = 0
+ for scale in scores_gen:
+ disc_loss += ((scale[-1]) ** 2).mean()
+ m_f = 0.9 * m_f + 0.1 * scale[-1].detach().mean().cpu().item()
+ s_f = 0.9 * s_f + 0.1 * scale[-1].detach().std().cpu().item()
+
+ for scale in scores_real:
+ disc_loss += ((1 - scale[-1]) ** 2).mean()
+ m_r = 0.9 * m_r + 0.1 * scale[-1].detach().mean().cpu().item()
+ s_r = 0.9 * s_r + 0.1 * scale[-1].detach().std().cpu().item()
+
+ disc_loss = 0.5 * disc_loss / len(scores_gen)
+ winning_chance = 0.5 * m.erfc( (m_r - m_f) / m.sqrt(2 * (s_f**2 + s_r**2)) )
+
+ disc.zero_grad()
+ disc_loss.backward()
+ optimizer_disc.step()
+
+ # generator update
+ scores_gen = disc(output)
+
+
+ # calculate loss
+ loss_reg = criterion(output.squeeze(1), target)
+
+ num_discs = len(scores_gen)
+ loss_gen = 0
+ for scale in scores_gen:
+ loss_gen += ((1 - scale[-1]) ** 2).mean() / num_discs
+
+ loss_feat = 0
+ for k in range(num_discs):
+ num_layers = len(scores_gen[k]) - 1
+ f = 4 / num_discs / num_layers
+ for l in range(num_layers):
+ loss_feat += f * F.l1_loss(scores_gen[k][l], scores_real[k][l].detach())
+
+ model.zero_grad()
+
+ (loss_gen + lambda_feat * loss_feat + lambda_reg * loss_reg).backward()
+
+ optimizer.step()
+
+ running_adv_loss += loss_gen.detach().cpu().item()
+ running_disc_loss += disc_loss.detach().cpu().item()
+ running_feature_loss += lambda_feat * loss_feat.detach().cpu().item()
+ running_reg_loss += lambda_reg * loss_reg.detach().cpu().item()
+
+ # update status bar
+ if i % log_interval == 0:
+ tepoch.set_postfix(adv_loss=f"{running_adv_loss/(i + 1):8.7f}",
+ disc_loss=f"{running_disc_loss/(i + 1):8.7f}",
+ feat_loss=f"{running_feature_loss/(i + 1):8.7f}",
+ reg_loss=f"{running_reg_loss/(i + 1):8.7f}",
+ wc=f"{100*winning_chance:5.2f}%")
+
+
+ # save checkpoint
+ checkpoint['state_dict'] = model.state_dict()
+ checkpoint['disc_state_dict'] = disc.state_dict()
+ checkpoint['optimizer_state_dict'] = optimizer.state_dict()
+ checkpoint['disc_optimizer_state_dict'] = optimizer_disc.state_dict()
+ checkpoint['scheduler_state_dict'] = scheduler.state_dict()
+ checkpoint['torch_rng_state'] = torch.get_rng_state()
+ checkpoint['numpy_rng_state'] = np.random.get_state()
+ checkpoint['python_rng_state'] = random.getstate()
+ checkpoint['adv_loss'] = running_adv_loss/(i + 1)
+ checkpoint['disc_loss'] = running_disc_loss/(i + 1)
+ checkpoint['feature_loss'] = running_feature_loss/(i + 1)
+ checkpoint['reg_loss'] = running_reg_loss/(i + 1)
+
+
+ if inference_test:
+ print("running inference test...")
+ out = model.process(features, periods).cpu().numpy()
+ wavfile.write(os.path.join(inference_folder, f'{model_name}_epoch_{ep}.wav'), 16000, out)
+ if ref is not None:
+ mos = pesq.pesq(16000, ref, out, mode='wb')
+ print(f"MOS (PESQ): {mos}")
+
+
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_epoch_{ep}.pth'))
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_last.pth'))
+
+
+ print()
+
+print('Done')
diff --git a/dnn/torch/osce/create_testvectors.py b/dnn/torch/osce/create_testvectors.py
new file mode 100644
index 00000000..a037d0db
--- /dev/null
+++ b/dnn/torch/osce/create_testvectors.py
@@ -0,0 +1,165 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+import torch
+import numpy as np
+
+from models import model_dict
+from utils import endoscopy
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint_path', type=str, help='path to folder containing checkpoints "lace_checkpoint.pth" and nolace_checkpoint.pth"')
+parser.add_argument('output_folder', type=str, help='output folder for testvectors')
+parser.add_argument('--debug', action='store_true', help='add debug output to output folder')
+
+
+def create_adaconv_testvector(prefix, adaconv, num_frames, debug=False):
+ feature_dim = adaconv.feature_dim
+ in_channels = adaconv.in_channels
+ out_channels = adaconv.out_channels
+ frame_size = adaconv.frame_size
+
+ features = torch.randn((1, num_frames, feature_dim))
+ x_in = torch.randn((1, in_channels, num_frames * frame_size))
+
+ x_out = adaconv(x_in, features, debug=debug)
+
+ features = features[0].detach().numpy()
+ x_in = x_in[0].reshape(in_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
+ x_out = x_out[0].reshape(out_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
+
+ features.tofile(prefix + '_features.f32')
+ x_in.tofile(prefix + '_x_in.f32')
+ x_out.tofile(prefix + '_x_out.f32')
+
+def create_adacomb_testvector(prefix, adacomb, num_frames, debug=False):
+ feature_dim = adacomb.feature_dim
+ in_channels = 1
+ frame_size = adacomb.frame_size
+
+ features = torch.randn((1, num_frames, feature_dim))
+ x_in = torch.randn((1, in_channels, num_frames * frame_size))
+ p_in = torch.randint(adacomb.kernel_size, 250, (1, num_frames))
+
+ x_out = adacomb(x_in, features, p_in, debug=debug)
+
+ features = features[0].detach().numpy()
+ x_in = x_in[0].permute(1, 0).detach().numpy()
+ p_in = p_in[0].detach().numpy().astype(np.int32)
+ x_out = x_out[0].permute(1, 0).detach().numpy()
+
+ features.tofile(prefix + '_features.f32')
+ x_in.tofile(prefix + '_x_in.f32')
+ p_in.tofile(prefix + '_p_in.s32')
+ x_out.tofile(prefix + '_x_out.f32')
+
+def create_adashape_testvector(prefix, adashape, num_frames):
+ feature_dim = adashape.feature_dim
+ frame_size = adashape.frame_size
+
+ features = torch.randn((1, num_frames, feature_dim))
+ x_in = torch.randn((1, 1, num_frames * frame_size))
+
+ x_out = adashape(x_in, features)
+
+ features = features[0].detach().numpy()
+ x_in = x_in.flatten().detach().numpy()
+ x_out = x_out.flatten().detach().numpy()
+
+ features.tofile(prefix + '_features.f32')
+ x_in.tofile(prefix + '_x_in.f32')
+ x_out.tofile(prefix + '_x_out.f32')
+
+def create_feature_net_testvector(prefix, model, num_frames):
+ num_features = model.num_features
+ num_subframes = 4 * num_frames
+
+ input_features = torch.randn((1, num_subframes, num_features))
+ periods = torch.randint(32, 300, (1, num_subframes))
+ numbits = model.numbits_range[0] + torch.rand((1, num_frames, 2)) * (model.numbits_range[1] - model.numbits_range[0])
+
+
+ pembed = model.pitch_embedding(periods)
+ nembed = torch.repeat_interleave(model.numbits_embedding(numbits).flatten(2), 4, dim=1)
+ full_features = torch.cat((input_features, pembed, nembed), dim=-1)
+
+ cf = model.feature_net(full_features)
+
+ input_features.float().numpy().tofile(prefix + "_in_features.f32")
+ periods.numpy().astype(np.int32).tofile(prefix + "_periods.s32")
+ numbits.float().numpy().tofile(prefix + "_numbits.f32")
+ full_features.detach().numpy().tofile(prefix + "_full_features.f32")
+ cf.detach().numpy().tofile(prefix + "_out_features.f32")
+
+
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ os.makedirs(args.output_folder, exist_ok=True)
+
+ lace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "lace_checkpoint.pth"), map_location='cpu')
+ nolace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "nolace_checkpoint.pth"), map_location='cpu')
+
+ lace = model_dict['lace'](**lace_checkpoint['setup']['model']['kwargs'])
+ nolace = model_dict['nolace'](**nolace_checkpoint['setup']['model']['kwargs'])
+
+ lace.load_state_dict(lace_checkpoint['state_dict'])
+ nolace.load_state_dict(nolace_checkpoint['state_dict'])
+
+ if args.debug:
+ endoscopy.init(args.output_folder)
+
+ # lace af1, 1 input channel, 1 output channel
+ create_adaconv_testvector(os.path.join(args.output_folder, "lace_af1"), lace.af1, 5, debug=args.debug)
+
+ # nolace af1, 1 input channel, 2 output channels
+ create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af1"), nolace.af1, 5, debug=args.debug)
+
+ # nolace af4, 2 input channel, 1 output channels
+ create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af4"), nolace.af4, 5, debug=args.debug)
+
+ # nolace af2, 2 input channel, 2 output channels
+ create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af2"), nolace.af2, 5, debug=args.debug)
+
+ # lace cf1
+ create_adacomb_testvector(os.path.join(args.output_folder, "lace_cf1"), lace.cf1, 5, debug=args.debug)
+
+ # nolace tdshape1
+ create_adashape_testvector(os.path.join(args.output_folder, "nolace_tdshape1"), nolace.tdshape1, 5)
+
+ # lace feature net
+ create_feature_net_testvector(os.path.join(args.output_folder, 'lace'), lace, 5)
+
+ if args.debug:
+ endoscopy.close()
diff --git a/dnn/torch/osce/data/__init__.py b/dnn/torch/osce/data/__init__.py
new file mode 100644
index 00000000..8df4d56a
--- /dev/null
+++ b/dnn/torch/osce/data/__init__.py
@@ -0,0 +1,2 @@
+from .silk_enhancement_set import SilkEnhancementSet
+from .lpcnet_vocoding_dataset import LPCNetVocodingDataset \ No newline at end of file
diff --git a/dnn/torch/osce/data/lpcnet_vocoding_dataset.py b/dnn/torch/osce/data/lpcnet_vocoding_dataset.py
new file mode 100644
index 00000000..36c8c724
--- /dev/null
+++ b/dnn/torch/osce/data/lpcnet_vocoding_dataset.py
@@ -0,0 +1,225 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+""" Dataset for LPCNet training """
+import os
+
+import yaml
+import torch
+import numpy as np
+from torch.utils.data import Dataset
+
+
+scale = 255.0/32768.0
+scale_1 = 32768.0/255.0
+def ulaw2lin(u):
+ u = u - 128
+ s = np.sign(u)
+ u = np.abs(u)
+ return s*scale_1*(np.exp(u/128.*np.log(256))-1)
+
+
+def lin2ulaw(x):
+ s = np.sign(x)
+ x = np.abs(x)
+ u = (s*(128*np.log(1+scale*x)/np.log(256)))
+ u = np.clip(128 + np.round(u), 0, 255)
+ return u
+
+
+def run_lpc(signal, lpcs, frame_length=160):
+ num_frames, lpc_order = lpcs.shape
+
+ prediction = np.concatenate(
+ [- np.convolve(signal[i * frame_length : (i + 1) * frame_length + lpc_order - 1], lpcs[i], mode='valid') for i in range(num_frames)]
+ )
+ error = signal[lpc_order :] - prediction
+
+ return prediction, error
+
+class LPCNetVocodingDataset(Dataset):
+ def __init__(self,
+ path_to_dataset,
+ features=['cepstrum', 'periods', 'pitch_corr'],
+ target='signal',
+ frames_per_sample=100,
+ feature_history=0,
+ feature_lookahead=0,
+ lpc_gamma=1):
+
+ super().__init__()
+
+ # load dataset info
+ self.path_to_dataset = path_to_dataset
+ with open(os.path.join(path_to_dataset, 'info.yml'), 'r') as f:
+ dataset = yaml.load(f, yaml.FullLoader)
+
+ # dataset version
+ self.version = dataset['version']
+ if self.version == 1:
+ self.getitem = self.getitem_v1
+ elif self.version == 2:
+ self.getitem = self.getitem_v2
+ else:
+ raise ValueError(f"dataset version {self.version} unknown")
+
+ # features
+ self.feature_history = feature_history
+ self.feature_lookahead = feature_lookahead
+ self.frame_offset = 2 + self.feature_history
+ self.frames_per_sample = frames_per_sample
+ self.input_features = features
+ self.feature_frame_layout = dataset['feature_frame_layout']
+ self.lpc_gamma = lpc_gamma
+
+ # load feature file
+ self.feature_file = os.path.join(path_to_dataset, dataset['feature_file'])
+ self.features = np.memmap(self.feature_file, dtype=dataset['feature_dtype'])
+ self.feature_frame_length = dataset['feature_frame_length']
+
+ assert len(self.features) % self.feature_frame_length == 0
+ self.features = self.features.reshape((-1, self.feature_frame_length))
+
+ # derive number of samples is dataset
+ self.dataset_length = (len(self.features) - self.frame_offset - self.feature_lookahead - 1 - 2) // self.frames_per_sample
+
+ # signals
+ self.frame_length = dataset['frame_length']
+ self.signal_frame_layout = dataset['signal_frame_layout']
+ self.target = target
+
+ # load signals
+ self.signal_file = os.path.join(path_to_dataset, dataset['signal_file'])
+ self.signals = np.memmap(self.signal_file, dtype=dataset['signal_dtype'])
+ self.signal_frame_length = dataset['signal_frame_length']
+ self.signals = self.signals.reshape((-1, self.signal_frame_length))
+ assert len(self.signals) == len(self.features) * self.frame_length
+
+
+ def __getitem__(self, index):
+ return self.getitem(index)
+
+ def getitem_v2(self, index):
+ sample = dict()
+
+ # extract features
+ frame_start = self.frame_offset + index * self.frames_per_sample - self.feature_history
+ frame_stop = self.frame_offset + (index + 1) * self.frames_per_sample + self.feature_lookahead
+
+ for feature in self.input_features:
+ feature_start, feature_stop = self.feature_frame_layout[feature]
+ sample[feature] = self.features[frame_start : frame_stop, feature_start : feature_stop]
+
+ # convert periods
+ if 'periods' in self.input_features:
+ sample['periods'] = (0.1 + 50 * sample['periods'] + 100).astype('int16')
+
+ signal_start = (self.frame_offset + index * self.frames_per_sample) * self.frame_length
+ signal_stop = (self.frame_offset + (index + 1) * self.frames_per_sample) * self.frame_length
+
+ # last_signal and signal are always expected to be there
+ sample['last_signal'] = self.signals[signal_start : signal_stop, self.signal_frame_layout['last_signal']]
+ sample['signal'] = self.signals[signal_start : signal_stop, self.signal_frame_layout['signal']]
+
+ # calculate prediction and error if lpc coefficients present and prediction not given
+ if 'lpc' in self.feature_frame_layout and 'prediction' not in self.signal_frame_layout:
+ # lpc coefficients with one frame lookahead
+ # frame positions (start one frame early for past excitation)
+ frame_start = self.frame_offset + self.frames_per_sample * index - 1
+ frame_stop = self.frame_offset + self.frames_per_sample * (index + 1)
+
+ # feature positions
+ lpc_start, lpc_stop = self.feature_frame_layout['lpc']
+ lpc_order = lpc_stop - lpc_start
+ lpcs = self.features[frame_start : frame_stop, lpc_start : lpc_stop]
+
+ # LPC weighting
+ lpc_order = lpc_stop - lpc_start
+ weights = np.array([self.lpc_gamma ** (i + 1) for i in range(lpc_order)])
+ lpcs = lpcs * weights
+
+ # signal position (lpc_order samples as history)
+ signal_start = frame_start * self.frame_length - lpc_order + 1
+ signal_stop = frame_stop * self.frame_length + 1
+ noisy_signal = self.signals[signal_start : signal_stop, self.signal_frame_layout['last_signal']]
+ clean_signal = self.signals[signal_start - 1 : signal_stop - 1, self.signal_frame_layout['signal']]
+
+ noisy_prediction, noisy_error = run_lpc(noisy_signal, lpcs, frame_length=self.frame_length)
+
+ # extract signals
+ offset = self.frame_length
+ sample['prediction'] = noisy_prediction[offset : offset + self.frame_length * self.frames_per_sample]
+ sample['last_error'] = noisy_error[offset - 1 : offset - 1 + self.frame_length * self.frames_per_sample]
+ # calculate error between real signal and noisy prediction
+
+
+ sample['error'] = sample['signal'] - sample['prediction']
+
+
+ # concatenate features
+ feature_keys = [key for key in self.input_features if not key.startswith("periods")]
+ features = torch.concat([torch.FloatTensor(sample[key]) for key in feature_keys], dim=-1)
+ target = torch.FloatTensor(sample[self.target]) / 2**15
+ periods = torch.LongTensor(sample['periods'])
+
+ return {'features' : features, 'periods' : periods, 'target' : target}
+
+ def getitem_v1(self, index):
+ sample = dict()
+
+ # extract features
+ frame_start = self.frame_offset + index * self.frames_per_sample - self.feature_history
+ frame_stop = self.frame_offset + (index + 1) * self.frames_per_sample + self.feature_lookahead
+
+ for feature in self.input_features:
+ feature_start, feature_stop = self.feature_frame_layout[feature]
+ sample[feature] = self.features[frame_start : frame_stop, feature_start : feature_stop]
+
+ # convert periods
+ if 'periods' in self.input_features:
+ sample['periods'] = (0.1 + 50 * sample['periods'] + 100).astype('int16')
+
+ signal_start = (self.frame_offset + index * self.frames_per_sample) * self.frame_length
+ signal_stop = (self.frame_offset + (index + 1) * self.frames_per_sample) * self.frame_length
+
+ # last_signal and signal are always expected to be there
+ for signal_name, index in self.signal_frame_layout.items():
+ sample[signal_name] = self.signals[signal_start : signal_stop, index]
+
+ # concatenate features
+ feature_keys = [key for key in self.input_features if not key.startswith("periods")]
+ features = torch.concat([torch.FloatTensor(sample[key]) for key in feature_keys], dim=-1)
+ signals = torch.cat([torch.LongTensor(sample[key]).unsqueeze(-1) for key in self.input_signals], dim=-1)
+ target = torch.LongTensor(sample[self.target])
+ periods = torch.LongTensor(sample['periods'])
+
+ return {'features' : features, 'periods' : periods, 'signals' : signals, 'target' : target}
+
+ def __len__(self):
+ return self.dataset_length
diff --git a/dnn/torch/osce/data/silk_conversion_set.py b/dnn/torch/osce/data/silk_conversion_set.py
new file mode 100644
index 00000000..8f646756
--- /dev/null
+++ b/dnn/torch/osce/data/silk_conversion_set.py
@@ -0,0 +1,132 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+
+from torch.utils.data import Dataset
+import numpy as np
+
+from utils.silk_features import silk_feature_factory
+from utils.pitch import hangover, calculate_acorr_window
+
+
+class SilkEnhancementSet(Dataset):
+ def __init__(self,
+ path,
+ frames_per_sample=100,
+ no_pitch_value=9,
+ acorr_radius=2,
+ pitch_hangover=8,
+ num_bands_clean_spec=64,
+ num_bands_noisy_spec=18,
+ noisy_spec_scale='opus',
+ noisy_apply_dct=True,
+ add_offset=False,
+ add_double_lag_acorr=False
+ ):
+
+ assert frames_per_sample % 4 == 0
+
+ self.frame_size = 80
+ self.frames_per_sample = frames_per_sample
+ self.no_pitch_value = no_pitch_value
+ self.acorr_radius = acorr_radius
+ self.pitch_hangover = pitch_hangover
+ self.num_bands_clean_spec = num_bands_clean_spec
+ self.num_bands_noisy_spec = num_bands_noisy_spec
+ self.noisy_spec_scale = noisy_spec_scale
+ self.add_double_lag_acorr = add_double_lag_acorr
+
+ self.lpcs = np.fromfile(os.path.join(path, 'features_lpc.f32'), dtype=np.float32).reshape(-1, 16)
+ self.ltps = np.fromfile(os.path.join(path, 'features_ltp.f32'), dtype=np.float32).reshape(-1, 5)
+ self.periods = np.fromfile(os.path.join(path, 'features_period.s16'), dtype=np.int16)
+ self.gains = np.fromfile(os.path.join(path, 'features_gain.f32'), dtype=np.float32)
+ self.num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32)
+ self.num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32)
+ self.offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)
+ self.lpcnet_features = np.from_file(os.path.join(path, 'features_lpcnet.f32'), dtype=np.float32).reshape(-1, 36)
+
+ self.coded_signal = np.fromfile(os.path.join(path, 'coded.s16'), dtype=np.int16)
+
+ self.create_features = silk_feature_factory(no_pitch_value,
+ acorr_radius,
+ pitch_hangover,
+ num_bands_clean_spec,
+ num_bands_noisy_spec,
+ noisy_spec_scale,
+ noisy_apply_dct,
+ add_offset,
+ add_double_lag_acorr)
+
+ self.history_len = 700 if add_double_lag_acorr else 350
+ # discard some frames to have enough signal history
+ self.skip_frames = 4 * ((self.history_len + 319) // 320 + 2)
+
+ num_frames = self.clean_signal.shape[0] // 80 - self.skip_frames
+
+ self.len = num_frames // frames_per_sample
+
+ def __len__(self):
+ return self.len
+
+ def __getitem__(self, index):
+
+ frame_start = self.frames_per_sample * index + self.skip_frames
+ frame_stop = frame_start + self.frames_per_sample
+
+ signal_start = frame_start * self.frame_size - self.skip
+ signal_stop = frame_stop * self.frame_size - self.skip
+
+ coded_signal = self.coded_signal[signal_start : signal_stop].astype(np.float32) / 2**15
+
+ coded_signal_history = self.coded_signal[signal_start - self.history_len : signal_start].astype(np.float32) / 2**15
+
+ features, periods = self.create_features(
+ coded_signal,
+ coded_signal_history,
+ self.lpcs[frame_start : frame_stop],
+ self.gains[frame_start : frame_stop],
+ self.ltps[frame_start : frame_stop],
+ self.periods[frame_start : frame_stop],
+ self.offsets[frame_start : frame_stop]
+ )
+
+ lpcnet_features = self.lpcnet_features[frame_start // 2 : frame_stop // 2, :20]
+
+ num_bits = np.repeat(self.num_bits[frame_start // 4 : frame_stop // 4], 4).astype(np.float32).reshape(-1, 1)
+ num_bits_smooth = np.repeat(self.num_bits_smooth[frame_start // 4 : frame_stop // 4], 4).astype(np.float32).reshape(-1, 1)
+
+ numbits = np.concatenate((num_bits, num_bits_smooth), axis=-1)
+
+ return {
+ 'silk_features' : features,
+ 'periods' : periods.astype(np.int64),
+ 'numbits' : numbits.astype(np.float32),
+ 'lpcnet_features' : lpcnet_features
+ }
diff --git a/dnn/torch/osce/data/silk_enhancement_set.py b/dnn/torch/osce/data/silk_enhancement_set.py
new file mode 100644
index 00000000..fd18c4de
--- /dev/null
+++ b/dnn/torch/osce/data/silk_enhancement_set.py
@@ -0,0 +1,140 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+
+from torch.utils.data import Dataset
+import numpy as np
+
+from utils.silk_features import silk_feature_factory
+from utils.pitch import hangover, calculate_acorr_window
+
+
+class SilkEnhancementSet(Dataset):
+ def __init__(self,
+ path,
+ frames_per_sample=100,
+ no_pitch_value=256,
+ preemph=0.85,
+ skip=91,
+ acorr_radius=2,
+ pitch_hangover=8,
+ num_bands_clean_spec=64,
+ num_bands_noisy_spec=18,
+ noisy_spec_scale='opus',
+ noisy_apply_dct=True,
+ add_double_lag_acorr=False,
+ ):
+
+ assert frames_per_sample % 4 == 0
+
+ self.frame_size = 80
+ self.frames_per_sample = frames_per_sample
+ self.no_pitch_value = no_pitch_value
+ self.preemph = preemph
+ self.skip = skip
+ self.acorr_radius = acorr_radius
+ self.pitch_hangover = pitch_hangover
+ self.num_bands_clean_spec = num_bands_clean_spec
+ self.num_bands_noisy_spec = num_bands_noisy_spec
+ self.noisy_spec_scale = noisy_spec_scale
+ self.add_double_lag_acorr = add_double_lag_acorr
+
+ self.lpcs = np.fromfile(os.path.join(path, 'features_lpc.f32'), dtype=np.float32).reshape(-1, 16)
+ self.ltps = np.fromfile(os.path.join(path, 'features_ltp.f32'), dtype=np.float32).reshape(-1, 5)
+ self.periods = np.fromfile(os.path.join(path, 'features_period.s16'), dtype=np.int16)
+ self.gains = np.fromfile(os.path.join(path, 'features_gain.f32'), dtype=np.float32)
+ self.num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32)
+ self.num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32)
+
+ self.clean_signal_hp = np.fromfile(os.path.join(path, 'clean_hp.s16'), dtype=np.int16)
+ self.clean_signal = np.fromfile(os.path.join(path, 'clean.s16'), dtype=np.int16)
+ self.coded_signal = np.fromfile(os.path.join(path, 'coded.s16'), dtype=np.int16)
+
+ self.create_features = silk_feature_factory(no_pitch_value,
+ acorr_radius,
+ pitch_hangover,
+ num_bands_clean_spec,
+ num_bands_noisy_spec,
+ noisy_spec_scale,
+ noisy_apply_dct,
+ add_double_lag_acorr)
+
+ self.history_len = 700 if add_double_lag_acorr else 350
+ # discard some frames to have enough signal history
+ self.skip_frames = 4 * ((skip + self.history_len + 319) // 320 + 2)
+
+ num_frames = self.clean_signal_hp.shape[0] // 80 - self.skip_frames
+
+ self.len = num_frames // frames_per_sample
+
+ def __len__(self):
+ return self.len
+
+ def __getitem__(self, index):
+
+ frame_start = self.frames_per_sample * index + self.skip_frames
+ frame_stop = frame_start + self.frames_per_sample
+
+ signal_start = frame_start * self.frame_size - self.skip
+ signal_stop = frame_stop * self.frame_size - self.skip
+
+ clean_signal_hp = self.clean_signal_hp[signal_start : signal_stop].astype(np.float32) / 2**15
+ clean_signal = self.clean_signal[signal_start : signal_stop].astype(np.float32) / 2**15
+ coded_signal = self.coded_signal[signal_start : signal_stop].astype(np.float32) / 2**15
+
+ coded_signal_history = self.coded_signal[signal_start - self.history_len : signal_start].astype(np.float32) / 2**15
+
+ features, periods = self.create_features(
+ coded_signal,
+ coded_signal_history,
+ self.lpcs[frame_start : frame_stop],
+ self.gains[frame_start : frame_stop],
+ self.ltps[frame_start : frame_stop],
+ self.periods[frame_start : frame_stop]
+ )
+
+ if self.preemph > 0:
+ clean_signal[1:] -= self.preemph * clean_signal[: -1]
+ clean_signal_hp[1:] -= self.preemph * clean_signal_hp[: -1]
+ coded_signal[1:] -= self.preemph * coded_signal[: -1]
+
+ num_bits = np.repeat(self.num_bits[frame_start // 4 : frame_stop // 4], 4).astype(np.float32).reshape(-1, 1)
+ num_bits_smooth = np.repeat(self.num_bits_smooth[frame_start // 4 : frame_stop // 4], 4).astype(np.float32).reshape(-1, 1)
+
+ numbits = np.concatenate((num_bits, num_bits_smooth), axis=-1)
+
+ return {
+ 'features' : features,
+ 'periods' : periods.astype(np.int64),
+ 'target_orig' : clean_signal.astype(np.float32),
+ 'target' : clean_signal_hp.astype(np.float32),
+ 'signals' : coded_signal.reshape(-1, 1).astype(np.float32),
+ 'numbits' : numbits.astype(np.float32)
+ }
diff --git a/dnn/torch/osce/engine/engine.py b/dnn/torch/osce/engine/engine.py
new file mode 100644
index 00000000..0762c898
--- /dev/null
+++ b/dnn/torch/osce/engine/engine.py
@@ -0,0 +1,103 @@
+import torch
+from tqdm import tqdm
+import sys
+
+def train_one_epoch(model, criterion, optimizer, dataloader, device, scheduler, log_interval=10):
+
+ model.to(device)
+ model.train()
+
+ running_loss = 0
+ previous_running_loss = 0
+
+
+ with tqdm(dataloader, unit='batch', file=sys.stdout) as tepoch:
+
+ for i, batch in enumerate(tepoch):
+
+ # set gradients to zero
+ optimizer.zero_grad()
+
+
+ # push batch to device
+ for key in batch:
+ batch[key] = batch[key].to(device)
+
+ target = batch['target']
+
+ # calculate model output
+ output = model(batch['signals'].permute(0, 2, 1), batch['features'], batch['periods'], batch['numbits'])
+
+ # calculate loss
+ if isinstance(output, list):
+ loss = torch.zeros(1, device=device)
+ for y in output:
+ loss = loss + criterion(target, y.squeeze(1))
+ loss = loss / len(output)
+ else:
+ loss = criterion(target, output.squeeze(1))
+
+ # calculate gradients
+ loss.backward()
+
+ # update weights
+ optimizer.step()
+
+ # update learning rate
+ scheduler.step()
+
+ # sparsification
+ if hasattr(model, 'sparsifier'):
+ model.sparsifier()
+
+ # update running loss
+ running_loss += float(loss.cpu())
+
+ # update status bar
+ if i % log_interval == 0:
+ tepoch.set_postfix(running_loss=f"{running_loss/(i + 1):8.7f}", current_loss=f"{(running_loss - previous_running_loss)/log_interval:8.7f}")
+ previous_running_loss = running_loss
+
+
+ running_loss /= len(dataloader)
+
+ return running_loss
+
+def evaluate(model, criterion, dataloader, device, log_interval=10):
+
+ model.to(device)
+ model.eval()
+
+ running_loss = 0
+ previous_running_loss = 0
+
+
+ with torch.no_grad():
+ with tqdm(dataloader, unit='batch', file=sys.stdout) as tepoch:
+
+ for i, batch in enumerate(tepoch):
+
+ # push batch to device
+ for key in batch:
+ batch[key] = batch[key].to(device)
+
+ target = batch['target']
+
+ # calculate model output
+ output = model(batch['signals'].permute(0, 2, 1), batch['features'], batch['periods'], batch['numbits'])
+
+ # calculate loss
+ loss = criterion(target, output.squeeze(1))
+
+ # update running loss
+ running_loss += float(loss.cpu())
+
+ # update status bar
+ if i % log_interval == 0:
+ tepoch.set_postfix(running_loss=f"{running_loss/(i + 1):8.7f}", current_loss=f"{(running_loss - previous_running_loss)/log_interval:8.7f}")
+ previous_running_loss = running_loss
+
+
+ running_loss /= len(dataloader)
+
+ return running_loss \ No newline at end of file
diff --git a/dnn/torch/osce/engine/vocoder_engine.py b/dnn/torch/osce/engine/vocoder_engine.py
new file mode 100644
index 00000000..9eee49e4
--- /dev/null
+++ b/dnn/torch/osce/engine/vocoder_engine.py
@@ -0,0 +1,101 @@
+import torch
+from tqdm import tqdm
+import sys
+
+def train_one_epoch(model, criterion, optimizer, dataloader, device, scheduler, log_interval=10):
+
+ model.to(device)
+ model.train()
+
+ running_loss = 0
+ previous_running_loss = 0
+
+
+ with tqdm(dataloader, unit='batch', file=sys.stdout) as tepoch:
+
+ for i, batch in enumerate(tepoch):
+
+ # set gradients to zero
+ optimizer.zero_grad()
+
+
+ # push batch to device
+ for key in batch:
+ batch[key] = batch[key].to(device)
+
+ target = batch['target']
+
+ # calculate model output
+ output = model(batch['features'], batch['periods'])
+
+ # calculate loss
+ if isinstance(output, list):
+ loss = torch.zeros(1, device=device)
+ for y in output:
+ loss = loss + criterion(target, y.squeeze(1))
+ loss = loss / len(output)
+ else:
+ loss = criterion(target, output.squeeze(1))
+
+ # calculate gradients
+ loss.backward()
+
+ # update weights
+ optimizer.step()
+
+ # update learning rate
+ scheduler.step()
+
+ # update running loss
+ running_loss += float(loss.cpu())
+
+ # update status bar
+ if i % log_interval == 0:
+ tepoch.set_postfix(running_loss=f"{running_loss/(i + 1):8.7f}", current_loss=f"{(running_loss - previous_running_loss)/log_interval:8.7f}")
+ previous_running_loss = running_loss
+
+
+ running_loss /= len(dataloader)
+
+ return running_loss
+
+def evaluate(model, criterion, dataloader, device, log_interval=10):
+
+ model.to(device)
+ model.eval()
+
+ running_loss = 0
+ previous_running_loss = 0
+
+
+ with torch.no_grad():
+ with tqdm(dataloader, unit='batch', file=sys.stdout) as tepoch:
+
+ for i, batch in enumerate(tepoch):
+
+
+
+ # push batch to device
+ for key in batch:
+ batch[key] = batch[key].to(device)
+
+ target = batch['target']
+
+ # calculate model output
+ output = model(batch['features'], batch['periods'])
+
+ # calculate loss
+ loss = criterion(target, output.squeeze(1))
+
+ # update running loss
+ running_loss += float(loss.cpu())
+
+ # update status bar
+ if i % log_interval == 0:
+ tepoch.set_postfix(running_loss=f"{running_loss/(i + 1):8.7f}", current_loss=f"{(running_loss - previous_running_loss)/log_interval:8.7f}")
+ previous_running_loss = running_loss
+
+
+ running_loss /= len(dataloader)
+
+ return running_loss \ No newline at end of file
diff --git a/dnn/torch/osce/export_model_weights.py b/dnn/torch/osce/export_model_weights.py
new file mode 100644
index 00000000..a504a61c
--- /dev/null
+++ b/dnn/torch/osce/export_model_weights.py
@@ -0,0 +1,174 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import sys
+
+import hashlib
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../weight-exchange'))
+
+import torch
+import wexchange.torch
+from wexchange.torch import dump_torch_weights
+from models import model_dict
+
+from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
+from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+from utils.layers.td_shaper import TDShaper
+from utils.misc import remove_all_weight_norm
+from wexchange.torch import dump_torch_weights
+
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint', type=str, help='LACE or NoLACE model checkpoint')
+parser.add_argument('output_dir', type=str, help='output folder')
+parser.add_argument('--quantize', action="store_true", help='quantization according to schedule')
+
+sparse_default=False
+schedules = {
+ 'nolace': [
+ ('pitch_embedding', dict()),
+ ('feature_net.conv1', dict()),
+ ('feature_net.conv2', dict(quantize=True, scale=None, sparse=sparse_default)),
+ ('feature_net.tconv', dict(quantize=True, scale=None, sparse=sparse_default)),
+ ('feature_net.gru', dict(quantize=True, scale=None, recurrent_scale=None, input_sparse=sparse_default, recurrent_sparse=sparse_default)),
+ ('cf1', dict(quantize=True, scale=None)),
+ ('cf2', dict(quantize=True, scale=None)),
+ ('af1', dict(quantize=True, scale=None)),
+ ('tdshape1', dict(quantize=True, scale=None)),
+ ('tdshape2', dict(quantize=True, scale=None)),
+ ('tdshape3', dict(quantize=True, scale=None)),
+ ('af2', dict(quantize=True, scale=None)),
+ ('af3', dict(quantize=True, scale=None)),
+ ('af4', dict(quantize=True, scale=None)),
+ ('post_cf1', dict(quantize=True, scale=None, sparse=sparse_default)),
+ ('post_cf2', dict(quantize=True, scale=None, sparse=sparse_default)),
+ ('post_af1', dict(quantize=True, scale=None, sparse=sparse_default)),
+ ('post_af2', dict(quantize=True, scale=None, sparse=sparse_default)),
+ ('post_af3', dict(quantize=True, scale=None, sparse=sparse_default))
+ ],
+ 'lace' : [
+ ('pitch_embedding', dict()),
+ ('feature_net.conv1', dict()),
+ ('feature_net.conv2', dict(quantize=True, scale=None, sparse=sparse_default)),
+ ('feature_net.tconv', dict(quantize=True, scale=None, sparse=sparse_default)),
+ ('feature_net.gru', dict(quantize=True, scale=None, recurrent_scale=None, input_sparse=sparse_default, recurrent_sparse=sparse_default)),
+ ('cf1', dict(quantize=True, scale=None)),
+ ('cf2', dict(quantize=True, scale=None)),
+ ('af1', dict(quantize=True, scale=None))
+ ]
+}
+
+
+# auxiliary functions
+def sha1(filename):
+ BUF_SIZE = 65536
+ sha1 = hashlib.sha1()
+
+ with open(filename, 'rb') as f:
+ while True:
+ data = f.read(BUF_SIZE)
+ if not data:
+ break
+ sha1.update(data)
+
+ return sha1.hexdigest()
+
+def osce_dump_generic(writer, name, module):
+ if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \
+ or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding) \
+ or isinstance(module, LimitedAdaptiveConv1d) or isinstance(module, LimitedAdaptiveComb1d) \
+ or isinstance(module, TDShaper) or isinstance(module, torch.nn.GRU):
+ dump_torch_weights(writer, module, name=name, verbose=True)
+ else:
+ for child_name, child in module.named_children():
+ osce_dump_generic(writer, (name + "_" + child_name).replace("feature_net", "fnet"), child)
+
+
+def export_name(name):
+ name = name.replace('.', '_')
+ name = name.replace('feature_net', 'fnet')
+ return name
+
+def osce_scheduled_dump(writer, prefix, model, schedule):
+ if not prefix.endswith('_'):
+ prefix += '_'
+
+ for name, kwargs in schedule:
+ dump_torch_weights(writer, model.get_submodule(name), prefix + export_name(name), **kwargs, verbose=True)
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ checkpoint_path = args.checkpoint
+ outdir = args.output_dir
+ os.makedirs(outdir, exist_ok=True)
+
+ # dump message
+ message = f"Auto generated from checkpoint {os.path.basename(checkpoint_path)} (sha1: {sha1(checkpoint_path)})"
+
+ # create model and load weights
+ checkpoint = torch.load(checkpoint_path, map_location='cpu')
+ model = model_dict[checkpoint['setup']['model']['name']](*checkpoint['setup']['model']['args'], **checkpoint['setup']['model']['kwargs'])
+ model.load_state_dict(checkpoint['state_dict'])
+ remove_all_weight_norm(model, verbose=True)
+
+ # CWriter
+ model_name = checkpoint['setup']['model']['name']
+ cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper() + 'Layers', add_typedef=True)
+
+ # Add custom includes and global parameters
+ cwriter.header.write(f'''
+#define {model_name.upper()}_PREEMPH {model.preemph}f
+#define {model_name.upper()}_FRAME_SIZE {model.FRAME_SIZE}
+#define {model_name.upper()}_OVERLAP_SIZE 40
+#define {model_name.upper()}_NUM_FEATURES {model.num_features}
+#define {model_name.upper()}_PITCH_MAX {model.pitch_max}
+#define {model_name.upper()}_PITCH_EMBEDDING_DIM {model.pitch_embedding_dim}
+#define {model_name.upper()}_NUMBITS_RANGE_LOW {model.numbits_range[0]}
+#define {model_name.upper()}_NUMBITS_RANGE_HIGH {model.numbits_range[1]}
+#define {model_name.upper()}_NUMBITS_EMBEDDING_DIM {model.numbits_embedding_dim}
+#define {model_name.upper()}_COND_DIM {model.cond_dim}
+#define {model_name.upper()}_HIDDEN_FEATURE_DIM {model.hidden_feature_dim}
+''')
+
+ for i, s in enumerate(model.numbits_embedding.scale_factors):
+ cwriter.header.write(f"#define {model_name.upper()}_NUMBITS_SCALE_{i} {float(s.detach().cpu())}f\n")
+
+ # dump layers
+ if model_name in schedules and args.quantize:
+ osce_scheduled_dump(cwriter, model_name, model, schedules[model_name])
+ else:
+ osce_dump_generic(cwriter, model_name, model)
+
+ cwriter.close()
diff --git a/dnn/torch/osce/losses/stft_loss.py b/dnn/torch/osce/losses/stft_loss.py
new file mode 100644
index 00000000..4c164cb6
--- /dev/null
+++ b/dnn/torch/osce/losses/stft_loss.py
@@ -0,0 +1,277 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+"""STFT-based Loss modules."""
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+import numpy as np
+import torchaudio
+
+
+def get_window(win_name, win_length, *args, **kwargs):
+ window_dict = {
+ 'bartlett_window' : torch.bartlett_window,
+ 'blackman_window' : torch.blackman_window,
+ 'hamming_window' : torch.hamming_window,
+ 'hann_window' : torch.hann_window,
+ 'kaiser_window' : torch.kaiser_window
+ }
+
+ if not win_name in window_dict:
+ raise ValueError()
+
+ return window_dict[win_name](win_length, *args, **kwargs)
+
+
+def stft(x, fft_size, hop_size, win_length, window):
+ """Perform STFT and convert to magnitude spectrogram.
+ Args:
+ x (Tensor): Input signal tensor (B, T).
+ fft_size (int): FFT size.
+ hop_size (int): Hop size.
+ win_length (int): Window length.
+ window (str): Window function type.
+ Returns:
+ Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
+ """
+
+ win = get_window(window, win_length).to(x.device)
+ x_stft = torch.stft(x, fft_size, hop_size, win_length, win, return_complex=True)
+
+
+ return torch.clamp(torch.abs(x_stft), min=1e-7)
+
+def spectral_convergence_loss(Y_true, Y_pred):
+ dims=list(range(1, len(Y_pred.shape)))
+ return torch.mean(torch.norm(torch.abs(Y_true) - torch.abs(Y_pred), p="fro", dim=dims) / (torch.norm(Y_pred, p="fro", dim=dims) + 1e-6))
+
+
+def log_magnitude_loss(Y_true, Y_pred):
+ Y_true_log_abs = torch.log(torch.abs(Y_true) + 1e-15)
+ Y_pred_log_abs = torch.log(torch.abs(Y_pred) + 1e-15)
+
+ return torch.mean(torch.abs(Y_true_log_abs - Y_pred_log_abs))
+
+def spectral_xcorr_loss(Y_true, Y_pred):
+ Y_true = Y_true.abs()
+ Y_pred = Y_pred.abs()
+ dims=list(range(1, len(Y_pred.shape)))
+ xcorr = torch.sum(Y_true * Y_pred, dim=dims) / torch.sqrt(torch.sum(Y_true ** 2, dim=dims) * torch.sum(Y_pred ** 2, dim=dims) + 1e-9)
+
+ return 1 - xcorr.mean()
+
+
+
+class MRLogMelLoss(nn.Module):
+ def __init__(self,
+ fft_sizes=[512, 256, 128, 64],
+ overlap=0.5,
+ fs=16000,
+ n_mels=18
+ ):
+
+ self.fft_sizes = fft_sizes
+ self.overlap = overlap
+ self.fs = fs
+ self.n_mels = n_mels
+
+ super().__init__()
+
+ self.mel_specs = []
+ for fft_size in fft_sizes:
+ hop_size = int(round(fft_size * (1 - self.overlap)))
+
+ n_mels = self.n_mels
+ if fft_size < 128:
+ n_mels //= 2
+
+ self.mel_specs.append(torchaudio.transforms.MelSpectrogram(fs, fft_size, hop_length=hop_size, n_mels=n_mels))
+
+ for i, mel_spec in enumerate(self.mel_specs):
+ self.add_module(f'mel_spec_{i+1}', mel_spec)
+
+ def forward(self, y_true, y_pred):
+
+ loss = torch.zeros(1, device=y_true.device)
+
+ for mel_spec in self.mel_specs:
+ Y_true = mel_spec(y_true)
+ Y_pred = mel_spec(y_pred)
+ loss = loss + log_magnitude_loss(Y_true, Y_pred)
+
+ loss = loss / len(self.mel_specs)
+
+ return loss
+
+def create_weight_matrix(num_bins, bins_per_band=10):
+ m = torch.zeros((num_bins, num_bins), dtype=torch.float32)
+
+ r0 = bins_per_band // 2
+ r1 = bins_per_band - r0
+
+ for i in range(num_bins):
+ i0 = max(i - r0, 0)
+ j0 = min(i + r1, num_bins)
+
+ m[i, i0: j0] += 1
+
+ if i < r0:
+ m[i, :r0 - i] += 1
+
+ if i > num_bins - r1:
+ m[i, num_bins - r1 - i:] += 1
+
+ return m / bins_per_band
+
+def weighted_spectral_convergence(Y_true, Y_pred, w):
+
+ # calculate sfm based weights
+ logY = torch.log(torch.abs(Y_true) + 1e-9)
+ Y = torch.abs(Y_true)
+
+ avg_logY = torch.matmul(logY.transpose(1, 2), w)
+ avg_Y = torch.matmul(Y.transpose(1, 2), w)
+
+ sfm = torch.exp(avg_logY) / (avg_Y + 1e-9)
+
+ weight = (torch.relu(1 - sfm) ** .5).transpose(1, 2)
+
+ loss = torch.mean(
+ torch.mean(weight * torch.abs(torch.abs(Y_true) - torch.abs(Y_pred)), dim=[1, 2])
+ / (torch.mean( weight * torch.abs(Y_true), dim=[1, 2]) + 1e-9)
+ )
+
+ return loss
+
+def gen_filterbank(N, Fs=16000):
+ in_freq = (np.arange(N+1, dtype='float32')/N*Fs/2)[None,:]
+ out_freq = (np.arange(N, dtype='float32')/N*Fs/2)[:,None]
+ #ERB from B.C.J Moore, An Introduction to the Psychology of Hearing, 5th Ed., page 73.
+ ERB_N = 24.7 + .108*in_freq
+ delta = np.abs(in_freq-out_freq)/ERB_N
+ center = (delta<.5).astype('float32')
+ R = -12*center*delta**2 + (1-center)*(3-12*delta)
+ RE = 10.**(R/10.)
+ norm = np.sum(RE, axis=1)
+ RE = RE/norm[:, np.newaxis]
+ return torch.from_numpy(RE)
+
+def smooth_log_mag(Y_true, Y_pred, filterbank):
+ Y_true_smooth = torch.matmul(filterbank, torch.abs(Y_true))
+ Y_pred_smooth = torch.matmul(filterbank, torch.abs(Y_pred))
+
+ loss = torch.abs(
+ torch.log(Y_true_smooth + 1e-9) - torch.log(Y_pred_smooth + 1e-9)
+ )
+
+ loss = loss.mean()
+
+ return loss
+
+class MRSTFTLoss(nn.Module):
+ def __init__(self,
+ fft_sizes=[2048, 1024, 512, 256, 128, 64],
+ overlap=0.5,
+ window='hann_window',
+ fs=16000,
+ log_mag_weight=1,
+ sc_weight=0,
+ wsc_weight=0,
+ smooth_log_mag_weight=0,
+ sxcorr_weight=0):
+ super().__init__()
+
+ self.fft_sizes = fft_sizes
+ self.overlap = overlap
+ self.window = window
+ self.log_mag_weight = log_mag_weight
+ self.sc_weight = sc_weight
+ self.wsc_weight = wsc_weight
+ self.smooth_log_mag_weight = smooth_log_mag_weight
+ self.sxcorr_weight = sxcorr_weight
+ self.fs = fs
+
+ # weights for SFM weighted spectral convergence loss
+ self.wsc_weights = torch.nn.ParameterDict()
+ for fft_size in fft_sizes:
+ width = min(11, int(1000 * fft_size / self.fs + .5))
+ width += width % 2
+ self.wsc_weights[str(fft_size)] = torch.nn.Parameter(
+ create_weight_matrix(fft_size // 2 + 1, width),
+ requires_grad=False
+ )
+
+ # filterbanks for smooth log magnitude loss
+ self.filterbanks = torch.nn.ParameterDict()
+ for fft_size in fft_sizes:
+ self.filterbanks[str(fft_size)] = torch.nn.Parameter(
+ gen_filterbank(fft_size//2),
+ requires_grad=False
+ )
+
+
+ def __call__(self, y_true, y_pred):
+
+
+ lm_loss = torch.zeros(1, device=y_true.device)
+ sc_loss = torch.zeros(1, device=y_true.device)
+ wsc_loss = torch.zeros(1, device=y_true.device)
+ slm_loss = torch.zeros(1, device=y_true.device)
+ sxcorr_loss = torch.zeros(1, device=y_true.device)
+
+ for fft_size in self.fft_sizes:
+ hop_size = int(round(fft_size * (1 - self.overlap)))
+ win_size = fft_size
+
+ Y_true = stft(y_true, fft_size, hop_size, win_size, self.window)
+ Y_pred = stft(y_pred, fft_size, hop_size, win_size, self.window)
+
+ if self.log_mag_weight > 0:
+ lm_loss = lm_loss + log_magnitude_loss(Y_true, Y_pred)
+
+ if self.sc_weight > 0:
+ sc_loss = sc_loss + spectral_convergence_loss(Y_true, Y_pred)
+
+ if self.wsc_weight > 0:
+ wsc_loss = wsc_loss + weighted_spectral_convergence(Y_true, Y_pred, self.wsc_weights[str(fft_size)])
+
+ if self.smooth_log_mag_weight > 0:
+ slm_loss = slm_loss + smooth_log_mag(Y_true, Y_pred, self.filterbanks[str(fft_size)])
+
+ if self.sxcorr_weight > 0:
+ sxcorr_loss = sxcorr_loss + spectral_xcorr_loss(Y_true, Y_pred)
+
+
+ total_loss = (self.log_mag_weight * lm_loss + self.sc_weight * sc_loss
+ + self.wsc_weight * wsc_loss + self.smooth_log_mag_weight * slm_loss
+ + self.sxcorr_weight * sxcorr_loss) / len(self.fft_sizes)
+
+ return total_loss \ No newline at end of file
diff --git a/dnn/torch/osce/make_default_setup.py b/dnn/torch/osce/make_default_setup.py
new file mode 100644
index 00000000..d7365fff
--- /dev/null
+++ b/dnn/torch/osce/make_default_setup.py
@@ -0,0 +1,93 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import sys
+import argparse
+
+import yaml
+
+from utils.templates import setup_dict
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('name', type=str, help='name of default setup file')
+parser.add_argument('--model', choices=['lace', 'nolace', 'lavoce'], help='model name', default='lace')
+parser.add_argument('--adversarial', action='store_true', help='setup for adversarial training')
+parser.add_argument('--path2dataset', type=str, help='dataset path', default=None)
+
+args = parser.parse_args()
+
+key = args.model + "_adv" if args.adversarial else args.model
+
+try:
+ setup = setup_dict[key]
+except KeyError:
+ print("setup not found, adversarial training possibly not specified for model")
+ sys.exit(1)
+
+# update dataset if given
+if type(args.path2dataset) != type(None):
+ setup['dataset'] = args.path2dataset
+
+name = args.name
+if not name.endswith('.yml'):
+ name += '.yml'
+
+if __name__ == '__main__':
+ with open(name, 'w') as f:
+ f.write(yaml.dump(setup)) \ No newline at end of file
diff --git a/dnn/torch/osce/models/__init__.py b/dnn/torch/osce/models/__init__.py
new file mode 100644
index 00000000..859db033
--- /dev/null
+++ b/dnn/torch/osce/models/__init__.py
@@ -0,0 +1,42 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+from .lace import LACE
+from .no_lace import NoLACE
+from .lavoce import LaVoce
+from .lavoce_400 import LaVoce400
+from .fd_discriminator import TFDMultiResolutionDiscriminator as FDMResDisc
+
+model_dict = {
+ 'lace': LACE,
+ 'nolace': NoLACE,
+ 'lavoce': LaVoce,
+ 'lavoce400': LaVoce400,
+ 'fdmresdisc': FDMResDisc,
+}
diff --git a/dnn/torch/osce/models/fd_discriminator.py b/dnn/torch/osce/models/fd_discriminator.py
new file mode 100644
index 00000000..22948624
--- /dev/null
+++ b/dnn/torch/osce/models/fd_discriminator.py
@@ -0,0 +1,974 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import math as m
+import copy
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn.utils import weight_norm, spectral_norm
+import torchaudio
+
+from utils.spec import gen_filterbank
+
+# auxiliary functions
+
+def remove_all_weight_norms(module):
+ for m in module.modules():
+ if hasattr(m, 'weight_v'):
+ nn.utils.remove_weight_norm(m)
+
+
+def create_smoothing_kernel(h, w, gamma=1.5):
+
+ ch = h / 2 - 0.5
+ cw = w / 2 - 0.5
+
+ sh = gamma * ch
+ sw = gamma * cw
+
+ vx = ((torch.arange(h) - ch) / sh) ** 2
+ vy = ((torch.arange(w) - cw) / sw) ** 2
+ vals = vx.view(-1, 1) + vy.view(1, -1)
+ kernel = torch.exp(- vals)
+ kernel = kernel / kernel.sum()
+
+ return kernel
+
+
+def create_kernel(h, w, sh, sw):
+ # proto kernel gives disjoint partition of 1
+ proto_kernel = torch.ones((sh, sw))
+
+ # create smoothing kernel eta
+ h_eta, w_eta = h - sh + 1, w - sw + 1
+ assert h_eta > 0 and w_eta > 0
+ eta = create_smoothing_kernel(h_eta, w_eta).view(1, 1, h_eta, w_eta)
+
+ kernel0 = F.pad(proto_kernel, [w_eta - 1, w_eta - 1, h_eta - 1, h_eta - 1]).unsqueeze(0).unsqueeze(0)
+ kernel = F.conv2d(kernel0, eta)
+
+ return kernel
+
+# positional embeddings
+class FrequencyPositionalEmbedding(nn.Module):
+ def __init__(self):
+
+ super().__init__()
+
+ def forward(self, x):
+
+ N = x.size(2)
+ args = torch.arange(0, N, dtype=x.dtype, device=x.device) * torch.pi * 2 / N
+ cos = torch.cos(args).reshape(1, 1, -1, 1)
+ sin = torch.sin(args).reshape(1, 1, -1, 1)
+ zeros = torch.zeros_like(x[:, 0:1, :, :])
+
+ y = torch.cat((x, zeros + sin, zeros + cos), dim=1)
+
+ return y
+
+
+class PositionalEmbedding2D(nn.Module):
+ def __init__(self, d=5):
+
+ super().__init__()
+
+ self.d = d
+
+ def forward(self, x):
+
+ N = x.size(2)
+ M = x.size(3)
+
+ h_args = torch.arange(0, N, dtype=x.dtype, device=x.device).reshape(1, 1, -1, 1)
+ w_args = torch.arange(0, M, dtype=x.dtype, device=x.device).reshape(1, 1, 1, -1)
+ coeffs = (10000 ** (-2 * torch.arange(0, self.d, dtype=x.dtype, device=x.device) / self.d)).reshape(1, -1, 1, 1)
+
+ h_sin = torch.sin(coeffs * h_args)
+ h_cos = torch.sin(coeffs * h_args)
+ w_sin = torch.sin(coeffs * w_args)
+ w_cos = torch.sin(coeffs * w_args)
+
+ zeros = torch.zeros_like(x[:, 0:1, :, :])
+
+ y = torch.cat((x, zeros + h_sin, zeros + h_cos, zeros + w_sin, zeros + w_cos), dim=1)
+
+ return y
+
+
+# spectral discriminator base class
+class SpecDiscriminatorBase(nn.Module):
+ RECEPTIVE_FIELD_MAX_WIDTH=10000
+ def __init__(self,
+ layers,
+ resolution,
+ fs=16000,
+ freq_roi=[50, 7000],
+ noise_gain=1e-3,
+ fmap_start_index=0
+ ):
+ super().__init__()
+
+
+ self.layers = nn.ModuleList(layers)
+ self.resolution = resolution
+ self.fs = fs
+ self.noise_gain = noise_gain
+ self.fmap_start_index = fmap_start_index
+
+ if fmap_start_index >= len(layers):
+ raise ValueError(f'fmap_start_index is larger than number of layers')
+
+ # filter bank for noise shaping
+ n_fft = resolution[0]
+
+ self.filterbank = nn.Parameter(
+ gen_filterbank(n_fft // 2, fs, keep_size=True),
+ requires_grad=False
+ )
+
+ # roi bins
+ f_step = fs / n_fft
+ self.start_bin = int(m.ceil(freq_roi[0] / f_step - 0.01))
+ self.stop_bin = min(int(m.floor(freq_roi[1] / f_step + 0.01)), n_fft//2 + 1)
+
+ self.init_weights()
+
+ # determine receptive field size, offsets and strides
+
+ hw = 1000
+ while True:
+ x = torch.zeros((1, hw, hw))
+ with torch.no_grad():
+ y = self.run_layer_stack(x)[-1]
+
+ pos0 = [y.size(-2) // 2, y.size(-1) // 2]
+ pos1 = [t + 1 for t in pos0]
+
+ hs0, ws0 = self._receptive_field((hw, hw), pos0)
+ hs1, ws1 = self._receptive_field((hw, hw), pos1)
+
+ h0 = hs0[1] - hs0[0] + 1
+ h1 = hs1[1] - hs1[0] + 1
+ w0 = ws0[1] - ws0[0] + 1
+ w1 = ws1[1] - ws1[0] + 1
+
+ if h0 != h1 or w0 != w1:
+ hw = 2 * hw
+ else:
+
+ # strides
+ sh = hs1[0] - hs0[0]
+ sw = ws1[0] - ws0[0]
+
+ if sh == 0 or sw == 0: continue
+
+ # offsets
+ oh = hs0[0] - sh * pos0[0]
+ ow = ws0[0] - sw * pos0[1]
+
+ # overlap factor
+ overlap = w0 / sw + h0 / sh
+
+ #print(f"{w0=} {h0=} {sw=} {sh=} {overlap=}")
+ self.receptive_field_params = {'width': [sw, ow, w0], 'height': [sh, oh, h0], 'overlap': overlap}
+
+ break
+
+ if hw > self.RECEPTIVE_FIELD_MAX_WIDTH:
+ print("warning: exceeded max size while trying to determine receptive field")
+
+ # create transposed convolutional kernel
+ #self.tconv_kernel = nn.Parameter(create_kernel(h0, w0, sw, sw), requires_grad=False)
+
+ def run_layer_stack(self, spec):
+
+ output = []
+
+ x = spec.unsqueeze(1)
+
+ for layer in self.layers:
+ x = layer(x)
+ output.append(x)
+
+ return output
+
+ def forward(self, x):
+ """ returns array with feature maps and final score at index -1 """
+
+ output = []
+
+ x = self.spectrogram(x)
+
+ output = self.run_layer_stack(x)
+
+ return output[self.fmap_start_index:]
+
+ def receptive_field(self, output_pos):
+
+ if self.receptive_field_params is not None:
+ s, o, h = self.receptive_field_params['height']
+ h_min = output_pos[0] * s + o + self.start_bin
+ h_max = h_min + h
+ h_min = max(h_min, self.start_bin)
+ h_max = min(h_max, self.stop_bin)
+
+ s, o, w = self.receptive_field_params['width']
+ w_min = output_pos[1] * s + o
+ w_max = w_min + w
+
+ return (h_min, h_max), (w_min, w_max)
+
+ else:
+ return None, None
+
+
+ def _receptive_field(self, input_dims, output_pos):
+ """ determines receptive field probabilistically via autograd (slow) """
+
+ x = torch.randn((1,) + input_dims, requires_grad=True)
+
+ # run input through layers
+ y = self.run_layer_stack(x)[-1]
+ b, c, h, w = y.shape
+
+ if output_pos[0] >= h or output_pos[1] >= w:
+ raise ValueError("position out of range")
+
+ mask = torch.zeros((b, c, h, w))
+ mask[0, 0, output_pos[0], output_pos[1]] = 1
+
+ (mask * y).sum().backward()
+
+ hs, ws = torch.nonzero(x.grad[0], as_tuple=True)
+
+ h_min, h_max = hs.min().item(), hs.max().item()
+ w_min, w_max = ws.min().item(), ws.max().item()
+
+ return [h_min, h_max], [w_min, w_max]
+
+
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d) or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+
+ def spectrogram(self, x):
+ n_fft, hop_length, win_length = self.resolution
+ x = x.squeeze(1)
+ window = getattr(torch, 'hann_window')(win_length).to(x.device)
+
+ x = torch.stft(x, n_fft=n_fft, hop_length=hop_length, win_length=win_length,\
+ window=window, return_complex=True) #[B, F, T]
+ x = torch.abs(x)
+
+ # noise floor following spectral envelope
+ smoothed_x = torch.matmul(self.filterbank, x)
+ noise = torch.randn_like(x) * smoothed_x * self.noise_gain
+ x = x + noise
+
+ # frequency ROI
+ x = x[:, self.start_bin : self.stop_bin + 1, ...]
+
+ return torchaudio.functional.amplitude_to_DB(x,db_multiplier=0.0, multiplier=20,amin=1e-05,top_db=80)#torch.sqrt(x)
+
+ def grad_map(self, x):
+ self.zero_grad()
+
+ n_fft, hop_length, win_length = self.resolution
+
+ window = getattr(torch, 'hann_window')(win_length).to(x.device)
+
+ y = torch.stft(x.squeeze(1), n_fft=n_fft, hop_length=hop_length, win_length=win_length,
+ window=window, return_complex=True) #[B, F, T]
+ y = torch.abs(y)
+
+ specgram = torchaudio.functional.amplitude_to_DB(y,db_multiplier=0.0, multiplier=20,amin=1e-05,top_db=80)
+
+ specgram.requires_grad = True
+ specgram.retain_grad()
+
+ if specgram.grad is not None:
+ specgram.grad.zero_()
+
+ y = specgram[:, self.start_bin : self.stop_bin + 1, ...]
+
+ scores = self.run_layer_stack(y)[-1]
+
+ loss = torch.mean((1 - scores) ** 2)
+ loss.backward()
+
+ return specgram.data[0], torch.abs(specgram.grad)[0]
+
+ def relevance_map(self, x):
+
+ n_fft, hop_length, win_length = self.resolution
+ y = x.view(-1)
+ window = getattr(torch, 'hann_window')(win_length).to(x.device)
+
+ y = torch.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=win_length,\
+ window=window, return_complex=True) #[B, F, T]
+ y = torch.abs(y)
+
+ specgram = torchaudio.functional.amplitude_to_DB(y,db_multiplier=0.0, multiplier=20,amin=1e-05,top_db=80)
+
+
+ scores = self.forward(x)[-1]
+
+ sh, _, h = self.receptive_field_params['height']
+ sw, _, w = self.receptive_field_params['width']
+ kernel = create_kernel(h, w, sh, sw).float().to(scores.device)
+ with torch.no_grad():
+ pad_w = (w + sw - 1) // sw
+ pad_h = (h + sh - 1) // sh
+ padded_scores = F.pad(scores, (pad_w, pad_w, pad_h, pad_h), mode='replicate')
+ # CAVE: padding should be derived from offsets
+ rv = F.conv_transpose2d(padded_scores, kernel, bias=None, stride=(sh, sw), padding=(h//2, w//2))
+ rv = rv[..., pad_h * sh : - pad_h * sh, pad_w * sw : -pad_w * sw]
+
+ relevance = torch.zeros_like(specgram)
+ relevance[..., self.start_bin : self.start_bin + rv.size(-2), : rv.size(-1)] = rv
+
+
+ return specgram, relevance
+
+
+ def lrp(self, x, eps=1e-9, label='both', threshold=0.5, low=None, high=None, verbose=False):
+ """ layer-wise relevance propagation (https://git.tu-berlin.de/gmontavon/lrp-tutorial) """
+
+ # ToDo: this code is highly unsafe as it assumes that layers are nn.Sequential with suitable activations
+
+ def newconv2d(layer,g):
+
+ new_layer = nn.Conv2d(layer.in_channels,
+ layer.out_channels,
+ layer.kernel_size,
+ stride=layer.stride,
+ padding=layer.padding,
+ dilation=layer.dilation,
+ groups=layer.groups)
+
+ try: new_layer.weight = nn.Parameter(g(layer.weight.data.clone()))
+ except AttributeError: pass
+
+ try: new_layer.bias = nn.Parameter(g(layer.bias.data.clone()))
+ except AttributeError: pass
+
+ return new_layer
+
+ bounds = {
+ 64: [-85.82449722290039, 2.1755014657974243],
+ 128: [-84.49211349487305, 3.5078893899917607],
+ 256: [-80.33127822875977, 7.6687201976776125],
+ 512: [-73.79328079223633, 14.20672025680542],
+ 1024: [-67.59239501953125, 20.40760498046875],
+ 2048: [-62.31902580261231, 25.680974197387698],
+ }
+
+ nfft = self.resolution[0]
+ if low is None: low = bounds[nfft][0]
+ if high is None: high = bounds[nfft][1]
+
+ remove_all_weight_norms(self)
+
+ for p in self.parameters():
+ if p.grad is not None:
+ p.grad.zero_()
+
+ num_layers = len(self.layers)
+ X = self.spectrogram(x). detach()
+
+
+ # forward pass
+ A = [X.unsqueeze(1)] + [None] * len(self.layers)
+
+ for i in range(num_layers - 1):
+ A[i + 1] = self.layers[i](A[i])
+
+ # initial relevance is last layer without activation
+ r = A[-2]
+ last_layer_rs = [r]
+ layer = self.layers[-1]
+ for sublayer in list(layer)[:-1]:
+ r = sublayer(r)
+ last_layer_rs.append(r)
+
+
+ mask = torch.zeros_like(r)
+ mask.requires_grad_(False)
+ if verbose:
+ print(r.min(), r.max())
+ if label in {'both', 'fake'}:
+ mask[r < -threshold] = 1
+ if label in {'both', 'real'}:
+ mask[r > threshold] = 1
+ r = r * mask
+
+ # backward pass
+ R = [None] * num_layers + [r]
+
+ for l in range(1, num_layers)[::-1]:
+ A[l] = (A[l]).data.requires_grad_(True)
+
+ layer = nn.Sequential(*(list(self.layers[l])[:-1]))
+ z = layer(A[l]) + eps
+ s = (R[l+1] / z).data
+ (z*s).sum().backward()
+ c = A[l].grad
+ R[l] = (A[l] * c).data
+
+ # first layer
+ A[0] = (A[0].data).requires_grad_(True)
+
+ Xl = (torch.zeros_like(A[0].data) + low).requires_grad_(True)
+ Xh = (torch.zeros_like(A[0].data) + high).requires_grad_(True)
+
+ if len(list(self.layers)) > 2:
+ # unsafe way to check for embedding layer
+ embed = list(self.layers[0])[0]
+ conv = list(self.layers[0])[1]
+
+ layer = nn.Sequential(embed, conv)
+ layerl = nn.Sequential(embed, newconv2d(conv, lambda p: p.clamp(min=0)))
+ layerh = nn.Sequential(embed, newconv2d(conv, lambda p: p.clamp(max=0)))
+
+ else:
+ layer = list(self.layers[0])[0]
+ layerl = newconv2d(layer, lambda p: p.clamp(min=0))
+ layerh = newconv2d(layer, lambda p: p.clamp(max=0))
+
+
+ z = layer(A[0])
+ z -= layerl(Xl) + layerh(Xh)
+ s = (R[1] / z).data
+ (z * s).sum().backward()
+ c, cp, cm = A[0].grad, Xl.grad, Xh.grad
+
+ R[0] = (A[0] * c + Xl * cp + Xh * cm)
+ #R[0] = (A[0] * c).data
+
+ return X, R[0].mean(dim=1)
+
+
+
+
+
+
+
+
+
+
+def create_3x3_conv_plan(num_layers : int,
+ f_stretch : int,
+ f_down : int,
+ t_stretch : int,
+ t_down : int
+ ):
+
+
+ """ creates a stride, dilation, padding plan for a 2d conv network
+
+ Args:
+ num_layers (int): number of layers
+ f_stretch (int): log_2 of stretching factor along frequency axis
+ f_down (int): log_2 of downsampling factor along frequency axis
+ t_stretch (int): log_2 of stretching factor along time axis
+ t_down (int): log_2 of downsampling factor along time axis
+
+ Returns:
+ list(list(tuple)): list containing entries [(stride_t, stride_f), (dilation_t, dilation_f), (padding_t, padding_f)]
+ """
+
+ assert num_layers > 0 and t_stretch >= 0 and t_down >= 0 and f_stretch >= 0 and f_down >= 0
+ assert f_stretch < num_layers and t_stretch < num_layers
+
+ def process_dimension(n_layers, stretch, down):
+
+ stack_layers = n_layers - 1
+
+ stride_layers = min(min(down, stretch) , stack_layers)
+ dilation_layers = max(min(stack_layers - stride_layers - 1, stretch - stride_layers), 0)
+ final_stride = 2 ** (max(down - stride_layers, 0))
+
+ final_dilation = 1
+ if stride_layers < stack_layers and stretch - stride_layers - dilation_layers > 0:
+ final_dilation = 2
+
+ strides, dilations, paddings = [], [], []
+ processed_layers = 0
+ current_dilation = 1
+
+ for _ in range(stride_layers):
+ # increase receptive field and downsample via stride = 2
+ strides.append(2)
+ dilations.append(1)
+ paddings.append(1)
+ processed_layers += 1
+
+ if processed_layers < stack_layers:
+ strides.append(1)
+ dilations.append(1)
+ paddings.append(1)
+ processed_layers += 1
+
+ for _ in range(dilation_layers):
+ # increase receptive field via dilation = 2
+ strides.append(1)
+ current_dilation *= 2
+ dilations.append(current_dilation)
+ paddings.append(current_dilation)
+ processed_layers += 1
+
+ while processed_layers < n_layers - 1:
+ # fill up with std layers
+ strides.append(1)
+ dilations.append(current_dilation)
+ paddings.append(current_dilation)
+ processed_layers += 1
+
+ # final layer
+ strides.append(final_stride)
+ current_dilation * final_dilation
+ dilations.append(current_dilation)
+ paddings.append(current_dilation)
+ processed_layers += 1
+
+ assert processed_layers == n_layers
+
+ return strides, dilations, paddings
+
+ t_strides, t_dilations, t_paddings = process_dimension(num_layers, t_stretch, t_down)
+ f_strides, f_dilations, f_paddings = process_dimension(num_layers, f_stretch, f_down)
+
+ plan = []
+
+ for i in range(num_layers):
+ plan.append([
+ (f_strides[i], t_strides[i]),
+ (f_dilations[i], t_dilations[i]),
+ (f_paddings[i], t_paddings[i]),
+ ])
+
+ return plan
+
+
+class DiscriminatorExperimental(SpecDiscriminatorBase):
+
+ def __init__(self,
+ resolution,
+ fs=16000,
+ freq_roi=[50, 7400],
+ noise_gain=0,
+ num_channels=16,
+ max_channels=512,
+ num_layers=5,
+ use_spectral_norm=False):
+
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+
+ self.num_channels = num_channels
+ self.num_channels_max = max_channels
+ self.num_layers = num_layers
+
+ layers = []
+ stride = (2, 1)
+ padding= (1, 1)
+ in_channels = 1 + 2
+ out_channels = self.num_channels
+ for _ in range(self.num_layers):
+ layers.append(
+ nn.Sequential(
+ FrequencyPositionalEmbedding(),
+ norm_f(nn.Conv2d(in_channels, out_channels, (3, 3), stride=stride, padding=padding)),
+ nn.ReLU(inplace=True)
+ )
+ )
+ in_channels = out_channels + 2
+ out_channels = min(2 * out_channels, self.num_channels_max)
+
+ layers.append(
+ nn.Sequential(
+ FrequencyPositionalEmbedding(),
+ norm_f(nn.Conv2d(in_channels, 1, (3, 3), padding=padding)),
+ nn.Sigmoid()
+ )
+ )
+
+ super().__init__(layers=layers, resolution=resolution, fs=fs, freq_roi=freq_roi, noise_gain=noise_gain)
+
+ # bias biases
+ bias_val = 0.1
+ with torch.no_grad():
+ for name, weight in self.named_parameters():
+ if 'bias' in name:
+ weight = weight + bias_val
+
+
+configs = {
+ 'f_down': {
+ 'stretch' : {
+ 64 : (0, 0),
+ 128: (1, 0),
+ 256: (2, 0),
+ 512: (3, 0),
+ 1024: (4, 0),
+ 2048: (5, 0)
+ },
+ 'down' : {
+ 64 : (0, 0),
+ 128: (1, 0),
+ 256: (2, 0),
+ 512: (3, 0),
+ 1024: (4, 0),
+ 2048: (5, 0)
+ }
+ },
+ 'ft_down': {
+ 'stretch' : {
+ 64 : (0, 4),
+ 128: (1, 3),
+ 256: (2, 2),
+ 512: (3, 1),
+ 1024: (4, 0),
+ 2048: (5, 0)
+ },
+ 'down' : {
+ 64 : (0, 4),
+ 128: (1, 3),
+ 256: (2, 2),
+ 512: (3, 1),
+ 1024: (4, 0),
+ 2048: (5, 0)
+ }
+ },
+ 'dilated': {
+ 'stretch' : {
+ 64 : (0, 4),
+ 128: (1, 3),
+ 256: (2, 2),
+ 512: (3, 1),
+ 1024: (4, 0),
+ 2048: (5, 0)
+ },
+ 'down' : {
+ 64 : (0, 0),
+ 128: (0, 0),
+ 256: (0, 0),
+ 512: (0, 0),
+ 1024: (0, 0),
+ 2048: (0, 0)
+ }
+ },
+ 'mixed': {
+ 'stretch' : {
+ 64 : (0, 4),
+ 128: (1, 3),
+ 256: (2, 2),
+ 512: (3, 1),
+ 1024: (4, 0),
+ 2048: (5, 0)
+ },
+ 'down' : {
+ 64 : (0, 0),
+ 128: (1, 0),
+ 256: (2, 0),
+ 512: (3, 0),
+ 1024: (4, 0),
+ 2048: (5, 0)
+ }
+ },
+}
+
+
+class DiscriminatorMagFree(SpecDiscriminatorBase):
+
+ def __init__(self,
+ resolution,
+ fs=16000,
+ freq_roi=[50, 7400],
+ noise_gain=0,
+ num_channels=16,
+ max_channels=256,
+ num_layers=5,
+ use_spectral_norm=False,
+ design=None):
+
+ if design is None:
+ raise ValueError('error: arch required in DiscriminatorMagFree')
+
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+
+ stretch = configs[design]['stretch'][resolution[0]]
+ down = configs[design]['down'][resolution[0]]
+
+ self.num_channels = num_channels
+ self.num_channels_max = max_channels
+ self.num_layers = num_layers
+ self.stretch = stretch
+ self.down = down
+
+ layers = []
+ plan = create_3x3_conv_plan(num_layers + 1, stretch[0], down[0], stretch[1], down[1])
+ in_channels = 1 + 2
+ out_channels = self.num_channels
+ for i in range(self.num_layers):
+ layers.append(
+ nn.Sequential(
+ FrequencyPositionalEmbedding(),
+ norm_f(nn.Conv2d(in_channels, out_channels, (3, 3), stride=plan[i][0], dilation=plan[i][1], padding=plan[i][2])),
+ nn.ReLU(inplace=True)
+ )
+ )
+ in_channels = out_channels + 2
+ # product over strides
+ channel_factor = plan[i][0][0] * plan[i][0][1]
+ out_channels = min(channel_factor * out_channels, self.num_channels_max)
+
+ layers.append(
+ nn.Sequential(
+ FrequencyPositionalEmbedding(),
+ norm_f(nn.Conv2d(in_channels, 1, (3, 3), stride=plan[-1][0], dilation=plan[-1][1], padding=plan[-1][2])),
+ nn.Sigmoid()
+ )
+ )
+
+
+
+ # for layer in layers:
+ # print(layer)
+
+ # print("end\n\n")
+
+ super().__init__(layers=layers, resolution=resolution, fs=fs, freq_roi=freq_roi, noise_gain=noise_gain)
+
+ # bias biases
+ bias_val = 0.1
+ with torch.no_grad():
+ for name, weight in self.named_parameters():
+ if 'bias' in name:
+ weight = weight + bias_val
+
+class DiscriminatorMagFreqPosition(SpecDiscriminatorBase):
+
+ def __init__(self,
+ resolution,
+ fs=16000,
+ freq_roi=[50, 7400],
+ noise_gain=0,
+ num_channels=16,
+ max_channels=512,
+ num_layers=5,
+ use_spectral_norm=False):
+
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+
+ self.num_channels = num_channels
+ self.num_channels_max = max_channels
+ self.num_layers = num_layers
+
+ layers = []
+ stride = (2, 1)
+ padding= (1, 1)
+ in_channels = 1 + 2
+ out_channels = self.num_channels
+ for _ in range(self.num_layers):
+ layers.append(
+ nn.Sequential(
+ FrequencyPositionalEmbedding(),
+ norm_f(nn.Conv2d(in_channels, out_channels, (3, 3), stride=stride, padding=padding)),
+ nn.LeakyReLU(0.2, inplace=True)
+ )
+ )
+ in_channels = out_channels + 2
+ out_channels = min(2 * out_channels, self.num_channels_max)
+
+ layers.append(
+ nn.Sequential(
+ FrequencyPositionalEmbedding(),
+ norm_f(nn.Conv2d(in_channels, 1, (3, 3), padding=padding))
+ )
+ )
+
+ super().__init__(layers=layers, resolution=resolution, fs=fs, freq_roi=freq_roi, noise_gain=noise_gain)
+
+
+
+class DiscriminatorMag2dPositional(SpecDiscriminatorBase):
+
+ def __init__(self,
+ resolution,
+ fs=16000,
+ freq_roi=[50, 7400],
+ noise_gain=0,
+ num_channels=16,
+ max_channels=512,
+ num_layers=5,
+ d=5,
+ use_spectral_norm=False):
+
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+ self.resolution = resolution
+ self.num_channels = num_channels
+ self.num_channels_max = max_channels
+ self.num_layers = num_layers
+ self.d = d
+ embedding_dim = 4 * d
+
+
+ layers = []
+ stride = (2, 2)
+ padding= (1, 1)
+ in_channels = 1 + embedding_dim
+ out_channels = self.num_channels
+ for _ in range(self.num_layers):
+ layers.append(
+ nn.Sequential(
+ PositionalEmbedding2D(d),
+ norm_f(nn.Conv2d(in_channels, out_channels, (3, 3), stride=stride, padding=padding)),
+ nn.LeakyReLU(0.2, inplace=True)
+ )
+ )
+ in_channels = out_channels + embedding_dim
+ out_channels = min(2 * out_channels, self.num_channels_max)
+
+
+ layers.append(
+ nn.Sequential(
+ PositionalEmbedding2D(),
+ norm_f(nn.Conv2d(in_channels, 1, (3, 3), padding=padding))
+ )
+ )
+
+ super().__init__(layers=layers, resolution=resolution, fs=fs, freq_roi=freq_roi, noise_gain=noise_gain)
+
+
+
+class DiscriminatorMag(SpecDiscriminatorBase):
+ def __init__(self,
+ resolution,
+ fs=16000,
+ freq_roi=[50, 7400],
+ noise_gain=0,
+ num_channels=32,
+ num_layers=5,
+ use_spectral_norm=False):
+
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
+
+ self.num_channels = num_channels
+ self.num_layers = num_layers
+
+ layers = []
+ stride = (1, 1)
+ padding= (1, 1)
+ in_channels = 1
+ out_channels = self.num_channels
+ for _ in range(self.num_layers):
+ layers.append(
+ nn.Sequential(
+ norm_f(nn.Conv2d(in_channels, out_channels, (3, 3), stride=stride, padding=padding)),
+ nn.LeakyReLU(0.2, inplace=True)
+ )
+ )
+ in_channels = out_channels
+
+ layers.append(norm_f(nn.Conv2d(in_channels, 1, (3, 3), padding=padding)))
+
+ super().__init__(layers=layers, resolution=resolution, fs=fs, freq_roi=freq_roi, noise_gain=noise_gain)
+
+
+discriminators = {
+ 'mag': DiscriminatorMag,
+ 'freqpos': DiscriminatorMagFreqPosition,
+ '2dpos': DiscriminatorMag2dPositional,
+ 'experimental': DiscriminatorExperimental,
+ 'free': DiscriminatorMagFree
+}
+
+class TFDMultiResolutionDiscriminator(torch.nn.Module):
+ def __init__(self,
+ fft_sizes_16k=[64, 128, 256, 512, 1024, 2048],
+ architecture='mag',
+ fs=16000,
+ freq_roi=[50, 7400],
+ noise_gain=0,
+ use_spectral_norm=False,
+ **kwargs):
+
+ super().__init__()
+
+
+ fft_sizes = [int(round(fft_size_16k * fs / 16000)) for fft_size_16k in fft_sizes_16k]
+
+ resolutions = [[n_fft, n_fft // 4, n_fft] for n_fft in fft_sizes]
+
+
+ Disc = discriminators[architecture]
+
+ discs = [Disc(resolutions[i], fs=fs, freq_roi=freq_roi, noise_gain=noise_gain, use_spectral_norm=use_spectral_norm, **kwargs) for i in range(len(resolutions))]
+
+ self.discriminators = nn.ModuleList(discs)
+
+ def forward(self, y):
+ outputs = []
+
+ for disc in self.discriminators:
+ outputs.append(disc(y))
+
+ return outputs
+
+
+class FWGAN_disc_wrapper(nn.Module):
+ def __init__(self, disc):
+ super().__init__()
+
+ self.disc = disc
+
+ def forward(self, y, y_hat):
+
+ out_real = self.disc(y)
+ out_fake = self.disc(y_hat)
+
+ y_d_rs = []
+ y_d_gs = []
+ fmap_rs = []
+ fmap_gs = []
+
+ for y_real, y_fake in zip(out_real, out_fake):
+ y_d_rs.append(y_real[-1])
+ y_d_gs.append(y_fake[-1])
+ fmap_rs.append(y_real[:-1])
+ fmap_gs.append(y_fake[:-1])
+
+ return y_d_rs, y_d_gs, fmap_rs, fmap_gs
diff --git a/dnn/torch/osce/models/lace.py b/dnn/torch/osce/models/lace.py
new file mode 100644
index 00000000..51d65c3e
--- /dev/null
+++ b/dnn/torch/osce/models/lace.py
@@ -0,0 +1,190 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+import numpy as np
+
+from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
+from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+
+from models.nns_base import NNSBase
+from models.silk_feature_net_pl import SilkFeatureNetPL
+from models.silk_feature_net import SilkFeatureNet
+from .scale_embedding import ScaleEmbedding
+
+import sys
+sys.path.append('../dnntools')
+
+from dnntools.sparsification import create_sparsifier
+
+
+class LACE(NNSBase):
+ """ Linear-Adaptive Coding Enhancer """
+ FRAME_SIZE=80
+
+ def __init__(self,
+ num_features=47,
+ pitch_embedding_dim=64,
+ cond_dim=256,
+ pitch_max=257,
+ kernel_size=15,
+ preemph=0.85,
+ skip=91,
+ comb_gain_limit_db=-6,
+ global_gain_limits_db=[-6, 6],
+ conv_gain_limits_db=[-6, 6],
+ numbits_range=[50, 650],
+ numbits_embedding_dim=8,
+ hidden_feature_dim=64,
+ partial_lookahead=True,
+ norm_p=2,
+ softquant=False,
+ sparsify=False,
+ sparsification_schedule=[10000, 30000, 100],
+ sparsification_density=0.5,
+ apply_weight_norm=False):
+
+ super().__init__(skip=skip, preemph=preemph)
+
+
+ self.num_features = num_features
+ self.cond_dim = cond_dim
+ self.pitch_max = pitch_max
+ self.pitch_embedding_dim = pitch_embedding_dim
+ self.kernel_size = kernel_size
+ self.preemph = preemph
+ self.skip = skip
+ self.numbits_range = numbits_range
+ self.numbits_embedding_dim = numbits_embedding_dim
+ self.hidden_feature_dim = hidden_feature_dim
+ self.partial_lookahead = partial_lookahead
+
+ # pitch embedding
+ self.pitch_embedding = nn.Embedding(pitch_max + 1, pitch_embedding_dim)
+
+ # numbits embedding
+ self.numbits_embedding = ScaleEmbedding(numbits_embedding_dim, *numbits_range, logscale=True)
+
+ # feature net
+ if partial_lookahead:
+ self.feature_net = SilkFeatureNetPL(num_features + pitch_embedding_dim + 2 * numbits_embedding_dim, cond_dim, hidden_feature_dim, softquant=softquant, sparsify=sparsify, sparsification_density=sparsification_density, apply_weight_norm=apply_weight_norm)
+ else:
+ self.feature_net = SilkFeatureNet(num_features + pitch_embedding_dim + 2 * numbits_embedding_dim, cond_dim)
+
+ # comb filters
+ left_pad = self.kernel_size // 2
+ right_pad = self.kernel_size - 1 - left_pad
+ self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p, softquant=softquant, apply_weight_norm=apply_weight_norm)
+ self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p, softquant=softquant, apply_weight_norm=apply_weight_norm)
+
+ # spectral shaping
+ self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p, softquant=softquant, apply_weight_norm=apply_weight_norm)
+
+ if sparsify:
+ self.sparsifier = create_sparsifier(self, *sparsification_schedule)
+
+ def flop_count(self, rate=16000, verbose=False):
+
+ frame_rate = rate / self.FRAME_SIZE
+
+ # feature net
+ feature_net_flops = self.feature_net.flop_count(frame_rate)
+ comb_flops = self.cf1.flop_count(rate) + self.cf2.flop_count(rate)
+ af_flops = self.af1.flop_count(rate)
+
+ if verbose:
+ print(f"feature net: {feature_net_flops / 1e6} MFLOPS")
+ print(f"comb filters: {comb_flops / 1e6} MFLOPS")
+ print(f"adaptive conv: {af_flops / 1e6} MFLOPS")
+
+ return feature_net_flops + comb_flops + af_flops
+
+ def forward(self, x, features, periods, numbits, debug=False):
+
+ periods = periods.squeeze(-1)
+ pitch_embedding = self.pitch_embedding(periods)
+ numbits_embedding = self.numbits_embedding(numbits).flatten(2)
+
+ full_features = torch.cat((features, pitch_embedding, numbits_embedding), dim=-1)
+ cf = self.feature_net(full_features)
+
+ y = self.cf1(x, cf, periods, debug=debug)
+
+ y = self.cf2(y, cf, periods, debug=debug)
+
+ y = self.af1(y, cf, debug=debug)
+
+ return y
+
+ def get_impulse_responses(self, features, periods, numbits):
+ """ generates impoulse responses on frame centers (input without batch dimension) """
+
+ num_frames = features.size(0)
+ batch_size = 32
+ max_len = 2 * (self.pitch_max + self.kernel_size) + 10
+
+ # spread out some pulses
+ x = np.zeros((batch_size, 1, num_frames * self.FRAME_SIZE))
+ for b in range(batch_size):
+ x[b, :, self.FRAME_SIZE // 2 + b * self.FRAME_SIZE :: batch_size * self.FRAME_SIZE] = 1
+
+ # prepare input
+ x = torch.from_numpy(x).float().to(features.device)
+ features = torch.repeat_interleave(features.unsqueeze(0), batch_size, 0)
+ periods = torch.repeat_interleave(periods.unsqueeze(0), batch_size, 0)
+ numbits = torch.repeat_interleave(numbits.unsqueeze(0), batch_size, 0)
+
+ # run network
+ with torch.no_grad():
+ periods = periods.squeeze(-1)
+ pitch_embedding = self.pitch_embedding(periods)
+ numbits_embedding = self.numbits_embedding(numbits).flatten(2)
+ full_features = torch.cat((features, pitch_embedding, numbits_embedding), dim=-1)
+ cf = self.feature_net(full_features)
+ y = self.cf1(x, cf, periods, debug=False)
+ y = self.cf2(y, cf, periods, debug=False)
+ y = self.af1(y, cf, debug=False)
+
+ # collect responses
+ y = y.detach().squeeze().cpu().numpy()
+ cut_frames = (max_len + self.FRAME_SIZE - 1) // self.FRAME_SIZE
+ num_responses = num_frames - cut_frames
+ responses = np.zeros((num_responses, max_len))
+
+ for i in range(num_responses):
+ b = i % batch_size
+ start = self.FRAME_SIZE // 2 + i * self.FRAME_SIZE
+ stop = start + max_len
+
+ responses[i, :] = y[b, start:stop]
+
+ return responses
diff --git a/dnn/torch/osce/models/lavoce.py b/dnn/torch/osce/models/lavoce.py
new file mode 100644
index 00000000..fcfdc8bf
--- /dev/null
+++ b/dnn/torch/osce/models/lavoce.py
@@ -0,0 +1,274 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+import numpy as np
+
+from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
+from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+from utils.layers.td_shaper import TDShaper
+from utils.layers.noise_shaper import NoiseShaper
+from utils.complexity import _conv1d_flop_count
+from utils.endoscopy import write_data
+
+from models.nns_base import NNSBase
+from models.lpcnet_feature_net import LPCNetFeatureNet
+from .scale_embedding import ScaleEmbedding
+
+def print_channels(y, prefix="", name="", rate=16000):
+ num_channels = y.size(1)
+ for i in range(num_channels):
+ channel_name = f"{prefix}_c{i:02d}"
+ if len(name) > 0: channel_name += "_" + name
+ ch = y[0,i,:].detach().cpu().numpy()
+ ch = ((2**14) * ch / np.max(ch)).astype(np.int16)
+ write_data(channel_name, ch, rate)
+
+
+
+class LaVoce(nn.Module):
+ """ Linear-Adaptive VOCodEr """
+ FEATURE_FRAME_SIZE=160
+ FRAME_SIZE=80
+
+ def __init__(self,
+ num_features=20,
+ pitch_embedding_dim=64,
+ cond_dim=256,
+ pitch_max=300,
+ kernel_size=15,
+ preemph=0.85,
+ comb_gain_limit_db=-6,
+ global_gain_limits_db=[-6, 6],
+ conv_gain_limits_db=[-6, 6],
+ norm_p=2,
+ avg_pool_k=4,
+ pulses=False,
+ innovate1=True,
+ innovate2=False,
+ innovate3=False,
+ ftrans_k=2):
+
+ super().__init__()
+
+
+ self.num_features = num_features
+ self.cond_dim = cond_dim
+ self.pitch_max = pitch_max
+ self.pitch_embedding_dim = pitch_embedding_dim
+ self.kernel_size = kernel_size
+ self.preemph = preemph
+ self.pulses = pulses
+ self.ftrans_k = ftrans_k
+
+ assert self.FEATURE_FRAME_SIZE % self.FRAME_SIZE == 0
+ self.upsamp_factor = self.FEATURE_FRAME_SIZE // self.FRAME_SIZE
+
+ # pitch embedding
+ self.pitch_embedding = nn.Embedding(pitch_max + 1, pitch_embedding_dim)
+
+ # feature net
+ self.feature_net = LPCNetFeatureNet(num_features + pitch_embedding_dim, cond_dim, self.upsamp_factor)
+
+ # noise shaper
+ self.noise_shaper = NoiseShaper(cond_dim, self.FRAME_SIZE)
+
+ # comb filters
+ left_pad = self.kernel_size // 2
+ right_pad = self.kernel_size - 1 - left_pad
+ self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
+ self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
+
+
+ self.af_prescale = LimitedAdaptiveConv1d(2, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+ self.af_mix = LimitedAdaptiveConv1d(2, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+
+ # spectral shaping
+ self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+
+ # non-linear transforms
+ self.tdshape1 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, innovate=innovate1)
+ self.tdshape2 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, innovate=innovate2)
+ self.tdshape3 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, innovate=innovate3)
+
+ # combinators
+ self.af2 = LimitedAdaptiveConv1d(2, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+ self.af3 = LimitedAdaptiveConv1d(2, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+ self.af4 = LimitedAdaptiveConv1d(2, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+
+ # feature transforms
+ self.post_cf1 = nn.Conv1d(cond_dim, cond_dim, ftrans_k)
+ self.post_cf2 = nn.Conv1d(cond_dim, cond_dim, ftrans_k)
+ self.post_af1 = nn.Conv1d(cond_dim, cond_dim, ftrans_k)
+ self.post_af2 = nn.Conv1d(cond_dim, cond_dim, ftrans_k)
+ self.post_af3 = nn.Conv1d(cond_dim, cond_dim, ftrans_k)
+
+
+ def create_phase_signals(self, periods):
+
+ batch_size = periods.size(0)
+ progression = torch.arange(1, self.FRAME_SIZE + 1, dtype=periods.dtype, device=periods.device).view((1, -1))
+ progression = torch.repeat_interleave(progression, batch_size, 0)
+
+ phase0 = torch.zeros(batch_size, dtype=periods.dtype, device=periods.device).unsqueeze(-1)
+ chunks = []
+ for sframe in range(periods.size(1)):
+ f = (2.0 * torch.pi / periods[:, sframe]).unsqueeze(-1)
+
+ if self.pulses:
+ alpha = torch.cos(f).view(batch_size, 1, 1)
+ chunk_sin = torch.sin(f * progression + phase0).view(batch_size, 1, self.FRAME_SIZE)
+ pulse_a = torch.relu(chunk_sin - alpha) / (1 - alpha)
+ pulse_b = torch.relu(-chunk_sin - alpha) / (1 - alpha)
+
+ chunk = torch.cat((pulse_a, pulse_b), dim = 1)
+ else:
+ chunk_sin = torch.sin(f * progression + phase0).view(batch_size, 1, self.FRAME_SIZE)
+ chunk_cos = torch.cos(f * progression + phase0).view(batch_size, 1, self.FRAME_SIZE)
+
+ chunk = torch.cat((chunk_sin, chunk_cos), dim = 1)
+
+ phase0 = phase0 + self.FRAME_SIZE * f
+
+ chunks.append(chunk)
+
+ phase_signals = torch.cat(chunks, dim=-1)
+
+ return phase_signals
+
+ def flop_count(self, rate=16000, verbose=False):
+
+ frame_rate = rate / self.FRAME_SIZE
+
+ # feature net
+ feature_net_flops = self.feature_net.flop_count(frame_rate)
+ comb_flops = self.cf1.flop_count(rate) + self.cf2.flop_count(rate)
+ af_flops = self.af1.flop_count(rate) + self.af2.flop_count(rate) + self.af3.flop_count(rate) + self.af4.flop_count(rate) + self.af_prescale.flop_count(rate) + self.af_mix.flop_count(rate)
+ feature_flops = (_conv1d_flop_count(self.post_cf1, frame_rate) + _conv1d_flop_count(self.post_cf2, frame_rate)
+ + _conv1d_flop_count(self.post_af1, frame_rate) + _conv1d_flop_count(self.post_af2, frame_rate) + _conv1d_flop_count(self.post_af3, frame_rate))
+
+ if verbose:
+ print(f"feature net: {feature_net_flops / 1e6} MFLOPS")
+ print(f"comb filters: {comb_flops / 1e6} MFLOPS")
+ print(f"adaptive conv: {af_flops / 1e6} MFLOPS")
+ print(f"feature transforms: {feature_flops / 1e6} MFLOPS")
+
+ return feature_net_flops + comb_flops + af_flops + feature_flops
+
+ def feature_transform(self, f, layer):
+ f = f.permute(0, 2, 1)
+ f = F.pad(f, [self.ftrans_k - 1, 0])
+ f = torch.tanh(layer(f))
+ return f.permute(0, 2, 1)
+
+ def forward(self, features, periods, debug=False):
+
+ periods = periods.squeeze(-1)
+ pitch_embedding = self.pitch_embedding(periods)
+
+ full_features = torch.cat((features, pitch_embedding), dim=-1)
+ cf = self.feature_net(full_features)
+
+ # upsample periods
+ periods = torch.repeat_interleave(periods, self.upsamp_factor, 1)
+
+ # pre-net
+ ref_phase = torch.tanh(self.create_phase_signals(periods))
+ if debug: print_channels(ref_phase, prefix="lavoce_01", name="pulse")
+ x = self.af_prescale(ref_phase, cf)
+ noise = self.noise_shaper(cf)
+ if debug: print_channels(torch.cat((x, noise), dim=1), prefix="lavoce_02", name="inputs")
+ y = self.af_mix(torch.cat((x, noise), dim=1), cf)
+ if debug: print_channels(y, prefix="lavoce_03", name="postselect1")
+
+ # temporal shaping + innovating
+ y1 = y[:, 0:1, :]
+ y2 = self.tdshape1(y[:, 1:2, :], cf)
+ if debug: print_channels(y2, prefix="lavoce_04", name="postshape1")
+ y = torch.cat((y1, y2), dim=1)
+ y = self.af2(y, cf, debug=debug)
+ if debug: print_channels(y, prefix="lavoce_05", name="postselect2")
+ cf = self.feature_transform(cf, self.post_af2)
+
+ y1 = y[:, 0:1, :]
+ y2 = self.tdshape2(y[:, 1:2, :], cf)
+ if debug: print_channels(y2, prefix="lavoce_06", name="postshape2")
+ y = torch.cat((y1, y2), dim=1)
+ y = self.af3(y, cf, debug=debug)
+ if debug: print_channels(y, prefix="lavoce_07", name="postmix1")
+ cf = self.feature_transform(cf, self.post_af3)
+
+ # spectral shaping
+ y = self.cf1(y, cf, periods, debug=debug)
+ if debug: print_channels(y, prefix="lavoce_08", name="postcomb1")
+ cf = self.feature_transform(cf, self.post_cf1)
+
+ y = self.cf2(y, cf, periods, debug=debug)
+ if debug: print_channels(y, prefix="lavoce_09", name="postcomb2")
+ cf = self.feature_transform(cf, self.post_cf2)
+
+ y = self.af1(y, cf, debug=debug)
+ if debug: print_channels(y, prefix="lavoce_10", name="postselect3")
+ cf = self.feature_transform(cf, self.post_af1)
+
+ # final temporal env adjustment
+ y1 = y[:, 0:1, :]
+ y2 = self.tdshape3(y[:, 1:2, :], cf)
+ if debug: print_channels(y2, prefix="lavoce_11", name="postshape3")
+ y = torch.cat((y1, y2), dim=1)
+ y = self.af4(y, cf, debug=debug)
+ if debug: print_channels(y, prefix="lavoce_12", name="postmix2")
+
+ return y
+
+ def process(self, features, periods, debug=False):
+
+ self.eval()
+ device = next(iter(self.parameters())).device
+ with torch.no_grad():
+
+ # run model
+ f = features.unsqueeze(0).to(device)
+ p = periods.unsqueeze(0).to(device)
+
+ y = self.forward(f, p, debug=debug).squeeze()
+
+ # deemphasis
+ if self.preemph > 0:
+ for i in range(len(y) - 1):
+ y[i + 1] += self.preemph * y[i]
+
+ # clip to valid range
+ out = torch.clip((2**15) * y, -2**15, 2**15 - 1).short()
+
+ return out \ No newline at end of file
diff --git a/dnn/torch/osce/models/lavoce_400.py b/dnn/torch/osce/models/lavoce_400.py
new file mode 100644
index 00000000..fe8263be
--- /dev/null
+++ b/dnn/torch/osce/models/lavoce_400.py
@@ -0,0 +1,254 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+import numpy as np
+
+from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
+from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+from utils.layers.td_shaper import TDShaper
+from utils.layers.noise_shaper import NoiseShaper
+from utils.complexity import _conv1d_flop_count
+from utils.endoscopy import write_data
+
+from models.nns_base import NNSBase
+from models.lpcnet_feature_net import LPCNetFeatureNet
+from .scale_embedding import ScaleEmbedding
+
+class LaVoce400(nn.Module):
+ """ Linear-Adaptive VOCodEr """
+ FEATURE_FRAME_SIZE=160
+ FRAME_SIZE=40
+
+ def __init__(self,
+ num_features=20,
+ pitch_embedding_dim=64,
+ cond_dim=256,
+ pitch_max=300,
+ kernel_size=15,
+ preemph=0.85,
+ comb_gain_limit_db=-6,
+ global_gain_limits_db=[-6, 6],
+ conv_gain_limits_db=[-6, 6],
+ norm_p=2,
+ avg_pool_k=4,
+ pulses=False):
+
+ super().__init__()
+
+
+ self.num_features = num_features
+ self.cond_dim = cond_dim
+ self.pitch_max = pitch_max
+ self.pitch_embedding_dim = pitch_embedding_dim
+ self.kernel_size = kernel_size
+ self.preemph = preemph
+ self.pulses = pulses
+
+ assert self.FEATURE_FRAME_SIZE % self.FRAME_SIZE == 0
+ self.upsamp_factor = self.FEATURE_FRAME_SIZE // self.FRAME_SIZE
+
+ # pitch embedding
+ self.pitch_embedding = nn.Embedding(pitch_max + 1, pitch_embedding_dim)
+
+ # feature net
+ self.feature_net = LPCNetFeatureNet(num_features + pitch_embedding_dim, cond_dim, self.upsamp_factor)
+
+ # noise shaper
+ self.noise_shaper = NoiseShaper(cond_dim, self.FRAME_SIZE)
+
+ # comb filters
+ left_pad = self.kernel_size // 2
+ right_pad = self.kernel_size - 1 - left_pad
+ self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=20, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
+ self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=20, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
+
+
+ self.af_prescale = LimitedAdaptiveConv1d(2, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+ self.af_mix = LimitedAdaptiveConv1d(2, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+
+ # spectral shaping
+ self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+
+ # non-linear transforms
+ self.tdshape1 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, innovate=True)
+ self.tdshape2 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k)
+ self.tdshape3 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k)
+
+ # combinators
+ self.af2 = LimitedAdaptiveConv1d(2, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+ self.af3 = LimitedAdaptiveConv1d(2, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+ self.af4 = LimitedAdaptiveConv1d(2, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+
+ # feature transforms
+ self.post_cf1 = nn.Conv1d(cond_dim, cond_dim, 2)
+ self.post_cf2 = nn.Conv1d(cond_dim, cond_dim, 2)
+ self.post_af1 = nn.Conv1d(cond_dim, cond_dim, 2)
+ self.post_af2 = nn.Conv1d(cond_dim, cond_dim, 2)
+ self.post_af3 = nn.Conv1d(cond_dim, cond_dim, 2)
+
+
+ def create_phase_signals(self, periods):
+
+ batch_size = periods.size(0)
+ progression = torch.arange(1, self.FRAME_SIZE + 1, dtype=periods.dtype, device=periods.device).view((1, -1))
+ progression = torch.repeat_interleave(progression, batch_size, 0)
+
+ phase0 = torch.zeros(batch_size, dtype=periods.dtype, device=periods.device).unsqueeze(-1)
+ chunks = []
+ for sframe in range(periods.size(1)):
+ f = (2.0 * torch.pi / periods[:, sframe]).unsqueeze(-1)
+
+ if self.pulses:
+ alpha = torch.cos(f).view(batch_size, 1, 1)
+ chunk_sin = torch.sin(f * progression + phase0).view(batch_size, 1, self.FRAME_SIZE)
+ pulse_a = torch.relu(chunk_sin - alpha) / (1 - alpha)
+ pulse_b = torch.relu(-chunk_sin - alpha) / (1 - alpha)
+
+ chunk = torch.cat((pulse_a, pulse_b), dim = 1)
+ else:
+ chunk_sin = torch.sin(f * progression + phase0).view(batch_size, 1, self.FRAME_SIZE)
+ chunk_cos = torch.cos(f * progression + phase0).view(batch_size, 1, self.FRAME_SIZE)
+
+ chunk = torch.cat((chunk_sin, chunk_cos), dim = 1)
+
+ phase0 = phase0 + self.FRAME_SIZE * f
+
+ chunks.append(chunk)
+
+ phase_signals = torch.cat(chunks, dim=-1)
+
+ return phase_signals
+
+ def flop_count(self, rate=16000, verbose=False):
+
+ frame_rate = rate / self.FRAME_SIZE
+
+ # feature net
+ feature_net_flops = self.feature_net.flop_count(frame_rate)
+ comb_flops = self.cf1.flop_count(rate) + self.cf2.flop_count(rate)
+ af_flops = self.af1.flop_count(rate) + self.af2.flop_count(rate) + self.af3.flop_count(rate) + self.af4.flop_count(rate) + self.af_prescale.flop_count(rate) + self.af_mix.flop_count(rate)
+ feature_flops = (_conv1d_flop_count(self.post_cf1, frame_rate) + _conv1d_flop_count(self.post_cf2, frame_rate)
+ + _conv1d_flop_count(self.post_af1, frame_rate) + _conv1d_flop_count(self.post_af2, frame_rate) + _conv1d_flop_count(self.post_af3, frame_rate))
+
+ if verbose:
+ print(f"feature net: {feature_net_flops / 1e6} MFLOPS")
+ print(f"comb filters: {comb_flops / 1e6} MFLOPS")
+ print(f"adaptive conv: {af_flops / 1e6} MFLOPS")
+ print(f"feature transforms: {feature_flops / 1e6} MFLOPS")
+
+ return feature_net_flops + comb_flops + af_flops + feature_flops
+
+ def feature_transform(self, f, layer):
+ f = f.permute(0, 2, 1)
+ f = F.pad(f, [1, 0])
+ f = torch.tanh(layer(f))
+ return f.permute(0, 2, 1)
+
+ def forward(self, features, periods, debug=False):
+
+ periods = periods.squeeze(-1)
+ pitch_embedding = self.pitch_embedding(periods)
+
+ full_features = torch.cat((features, pitch_embedding), dim=-1)
+ cf = self.feature_net(full_features)
+
+ # upsample periods
+ periods = torch.repeat_interleave(periods, self.upsamp_factor, 1)
+
+ # pre-net
+ ref_phase = torch.tanh(self.create_phase_signals(periods))
+ x = self.af_prescale(ref_phase, cf)
+ noise = self.noise_shaper(cf)
+ y = self.af_mix(torch.cat((x, noise), dim=1), cf)
+
+ if debug:
+ ch0 = y[0,0,:].detach().cpu().numpy()
+ ch1 = y[0,1,:].detach().cpu().numpy()
+ ch0 = (2**15 * ch0 / np.max(ch0)).astype(np.int16)
+ ch1 = (2**15 * ch1 / np.max(ch1)).astype(np.int16)
+ write_data('prior_channel0', ch0, 16000)
+ write_data('prior_channel1', ch1, 16000)
+
+ # temporal shaping + innovating
+ y1 = y[:, 0:1, :]
+ y2 = self.tdshape1(y[:, 1:2, :], cf)
+ y = torch.cat((y1, y2), dim=1)
+ y = self.af2(y, cf, debug=debug)
+ cf = self.feature_transform(cf, self.post_af2)
+
+ y1 = y[:, 0:1, :]
+ y2 = self.tdshape2(y[:, 1:2, :], cf)
+ y = torch.cat((y1, y2), dim=1)
+ y = self.af3(y, cf, debug=debug)
+ cf = self.feature_transform(cf, self.post_af3)
+
+ # spectral shaping
+ y = self.cf1(y, cf, periods, debug=debug)
+ cf = self.feature_transform(cf, self.post_cf1)
+
+ y = self.cf2(y, cf, periods, debug=debug)
+ cf = self.feature_transform(cf, self.post_cf2)
+
+ y = self.af1(y, cf, debug=debug)
+ cf = self.feature_transform(cf, self.post_af1)
+
+ # final temporal env adjustment
+ y1 = y[:, 0:1, :]
+ y2 = self.tdshape3(y[:, 1:2, :], cf)
+ y = torch.cat((y1, y2), dim=1)
+ y = self.af4(y, cf, debug=debug)
+
+ return y
+
+ def process(self, features, periods, debug=False):
+
+ self.eval()
+ device = next(iter(self.parameters())).device
+ with torch.no_grad():
+
+ # run model
+ f = features.unsqueeze(0).to(device)
+ p = periods.unsqueeze(0).to(device)
+
+ y = self.forward(f, p, debug=debug).squeeze()
+
+ # deemphasis
+ if self.preemph > 0:
+ for i in range(len(y) - 1):
+ y[i + 1] += self.preemph * y[i]
+
+ # clip to valid range
+ out = torch.clip((2**15) * y, -2**15, 2**15 - 1).short()
+
+ return out \ No newline at end of file
diff --git a/dnn/torch/osce/models/lpcnet_feature_net.py b/dnn/torch/osce/models/lpcnet_feature_net.py
new file mode 100644
index 00000000..b637d748
--- /dev/null
+++ b/dnn/torch/osce/models/lpcnet_feature_net.py
@@ -0,0 +1,91 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from utils.complexity import _conv1d_flop_count
+
+class LPCNetFeatureNet(nn.Module):
+
+ def __init__(self,
+ feature_dim=84,
+ num_channels=256,
+ upsamp_factor=2,
+ lookahead=True):
+
+ super().__init__()
+
+ self.feature_dim = feature_dim
+ self.num_channels = num_channels
+ self.upsamp_factor = upsamp_factor
+ self.lookahead = lookahead
+
+ self.conv1 = nn.Conv1d(feature_dim, num_channels, 3)
+ self.conv2 = nn.Conv1d(num_channels, num_channels, 3)
+
+ self.gru = nn.GRU(num_channels, num_channels, batch_first=True)
+
+ self.tconv = nn.ConvTranspose1d(num_channels, num_channels, upsamp_factor, upsamp_factor)
+
+ def flop_count(self, rate=100):
+ count = 0
+ for conv in self.conv1, self.conv2, self.tconv:
+ count += _conv1d_flop_count(conv, rate)
+
+ count += 2 * (3 * self.gru.input_size * self.gru.hidden_size + 3 * self.gru.hidden_size * self.gru.hidden_size) * rate
+
+ return count
+
+
+ def forward(self, features, state=None):
+ """ features shape: (batch_size, num_frames, feature_dim) """
+
+ batch_size = features.size(0)
+
+ if state is None:
+ state = torch.zeros((1, batch_size, self.num_channels), device=features.device)
+
+
+ features = features.permute(0, 2, 1)
+ if self.lookahead:
+ c = torch.tanh(self.conv1(F.pad(features, [1, 1])))
+ c = torch.tanh(self.conv2(F.pad(c, [2, 0])))
+ else:
+ c = torch.tanh(self.conv1(F.pad(features, [2, 0])))
+ c = torch.tanh(self.conv2(F.pad(c, [2, 0])))
+
+ c = torch.tanh(self.tconv(c))
+
+ c = c.permute(0, 2, 1)
+
+ c, _ = self.gru(c, state)
+
+ return c \ No newline at end of file
diff --git a/dnn/torch/osce/models/nns_base.py b/dnn/torch/osce/models/nns_base.py
new file mode 100644
index 00000000..6e667b96
--- /dev/null
+++ b/dnn/torch/osce/models/nns_base.py
@@ -0,0 +1,69 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+class NNSBase(nn.Module):
+
+ def __init__(self, skip=91, preemph=0.85):
+ super().__init__()
+
+ self.skip = skip
+ self.preemph = preemph
+
+ def process(self, sig, features, periods, numbits, debug=False):
+
+ self.eval()
+ has_numbits = 'numbits' in self.forward.__code__.co_varnames
+ device = next(iter(self.parameters())).device
+ with torch.no_grad():
+
+ # run model
+ x = sig.view(1, 1, -1).to(device)
+ f = features.unsqueeze(0).to(device)
+ p = periods.unsqueeze(0).to(device)
+ n = numbits.unsqueeze(0).to(device)
+
+ if has_numbits:
+ y = self.forward(x, f, p, n, debug=debug).squeeze()
+ else:
+ y = self.forward(x, f, p, debug=debug).squeeze()
+
+ # deemphasis
+ if self.preemph > 0:
+ for i in range(len(y) - 1):
+ y[i + 1] += self.preemph * y[i]
+
+ # delay compensation
+ y = torch.cat((y[self.skip:], torch.zeros(self.skip, dtype=y.dtype, device=y.device)))
+ out = torch.clip((2**15) * y, -2**15, 2**15 - 1).short()
+
+ return out \ No newline at end of file
diff --git a/dnn/torch/osce/models/no_lace.py b/dnn/torch/osce/models/no_lace.py
new file mode 100644
index 00000000..78c3a301
--- /dev/null
+++ b/dnn/torch/osce/models/no_lace.py
@@ -0,0 +1,218 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import numbers
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.utils import weight_norm
+
+
+import numpy as np
+
+from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
+from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+from utils.layers.td_shaper import TDShaper
+from utils.complexity import _conv1d_flop_count
+
+from models.nns_base import NNSBase
+from models.silk_feature_net_pl import SilkFeatureNetPL
+from models.silk_feature_net import SilkFeatureNet
+from .scale_embedding import ScaleEmbedding
+
+import sys
+sys.path.append('../dnntools')
+from dnntools.quantization import soft_quant
+from dnntools.sparsification import create_sparsifier, mark_for_sparsification
+
+class NoLACE(NNSBase):
+ """ Non-Linear Adaptive Coding Enhancer """
+ FRAME_SIZE=80
+
+ def __init__(self,
+ num_features=47,
+ pitch_embedding_dim=64,
+ cond_dim=256,
+ pitch_max=257,
+ kernel_size=15,
+ preemph=0.85,
+ skip=91,
+ comb_gain_limit_db=-6,
+ global_gain_limits_db=[-6, 6],
+ conv_gain_limits_db=[-6, 6],
+ numbits_range=[50, 650],
+ numbits_embedding_dim=8,
+ hidden_feature_dim=64,
+ partial_lookahead=True,
+ norm_p=2,
+ avg_pool_k=4,
+ pool_after=False,
+ softquant=False,
+ sparsify=False,
+ sparsification_schedule=[100, 1000, 100],
+ sparsification_density=0.5,
+ apply_weight_norm=False):
+
+ super().__init__(skip=skip, preemph=preemph)
+
+ self.num_features = num_features
+ self.cond_dim = cond_dim
+ self.pitch_max = pitch_max
+ self.pitch_embedding_dim = pitch_embedding_dim
+ self.kernel_size = kernel_size
+ self.preemph = preemph
+ self.skip = skip
+ self.numbits_range = numbits_range
+ self.numbits_embedding_dim = numbits_embedding_dim
+ self.hidden_feature_dim = hidden_feature_dim
+ self.partial_lookahead = partial_lookahead
+
+ if isinstance(sparsification_density, numbers.Number):
+ sparsification_density = 10 * [sparsification_density]
+
+ norm = weight_norm if apply_weight_norm else lambda x, name=None: x
+
+ # pitch embedding
+ self.pitch_embedding = nn.Embedding(pitch_max + 1, pitch_embedding_dim)
+
+ # numbits embedding
+ self.numbits_embedding = ScaleEmbedding(numbits_embedding_dim, *numbits_range, logscale=True)
+
+ # feature net
+ if partial_lookahead:
+ self.feature_net = SilkFeatureNetPL(num_features + pitch_embedding_dim + 2 * numbits_embedding_dim, cond_dim, hidden_feature_dim, softquant=softquant, sparsify=sparsify, sparsification_density=sparsification_density, apply_weight_norm=apply_weight_norm)
+ else:
+ self.feature_net = SilkFeatureNet(num_features + pitch_embedding_dim + 2 * numbits_embedding_dim, cond_dim)
+
+ # comb filters
+ left_pad = self.kernel_size // 2
+ right_pad = self.kernel_size - 1 - left_pad
+ self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p, softquant=softquant, apply_weight_norm=apply_weight_norm)
+ self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p, softquant=softquant, apply_weight_norm=apply_weight_norm)
+
+ # spectral shaping
+ self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p, softquant=softquant, apply_weight_norm=apply_weight_norm)
+
+ # non-linear transforms
+ self.tdshape1 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, pool_after=pool_after, softquant=softquant, apply_weight_norm=apply_weight_norm)
+ self.tdshape2 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, pool_after=pool_after, softquant=softquant, apply_weight_norm=apply_weight_norm)
+ self.tdshape3 = TDShaper(cond_dim, frame_size=self.FRAME_SIZE, avg_pool_k=avg_pool_k, pool_after=pool_after, softquant=softquant, apply_weight_norm=apply_weight_norm)
+
+ # combinators
+ self.af2 = LimitedAdaptiveConv1d(2, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p, softquant=softquant, apply_weight_norm=apply_weight_norm)
+ self.af3 = LimitedAdaptiveConv1d(2, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p, softquant=softquant, apply_weight_norm=apply_weight_norm)
+ self.af4 = LimitedAdaptiveConv1d(2, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p, softquant=softquant, apply_weight_norm=apply_weight_norm)
+
+ # feature transforms
+ self.post_cf1 = norm(nn.Conv1d(cond_dim, cond_dim, 2))
+ self.post_cf2 = norm(nn.Conv1d(cond_dim, cond_dim, 2))
+ self.post_af1 = norm(nn.Conv1d(cond_dim, cond_dim, 2))
+ self.post_af2 = norm(nn.Conv1d(cond_dim, cond_dim, 2))
+ self.post_af3 = norm(nn.Conv1d(cond_dim, cond_dim, 2))
+
+ if softquant:
+ self.post_cf1 = soft_quant(self.post_cf1)
+ self.post_cf2 = soft_quant(self.post_cf2)
+ self.post_af1 = soft_quant(self.post_af1)
+ self.post_af2 = soft_quant(self.post_af2)
+ self.post_af3 = soft_quant(self.post_af3)
+
+
+ if sparsify:
+ mark_for_sparsification(self.post_cf1, (sparsification_density[4], [8, 4]))
+ mark_for_sparsification(self.post_cf2, (sparsification_density[5], [8, 4]))
+ mark_for_sparsification(self.post_af1, (sparsification_density[6], [8, 4]))
+ mark_for_sparsification(self.post_af2, (sparsification_density[7], [8, 4]))
+ mark_for_sparsification(self.post_af3, (sparsification_density[8], [8, 4]))
+
+ self.sparsifier = create_sparsifier(self, *sparsification_schedule)
+
+ def flop_count(self, rate=16000, verbose=False):
+
+ frame_rate = rate / self.FRAME_SIZE
+
+ # feature net
+ feature_net_flops = self.feature_net.flop_count(frame_rate)
+ comb_flops = self.cf1.flop_count(rate) + self.cf2.flop_count(rate)
+ af_flops = self.af1.flop_count(rate) + self.af2.flop_count(rate) + self.af3.flop_count(rate) + self.af4.flop_count(rate)
+ shape_flops = self.tdshape1.flop_count(rate) + self.tdshape2.flop_count(rate) + self.tdshape3.flop_count(rate)
+ feature_flops = (_conv1d_flop_count(self.post_cf1, frame_rate) + _conv1d_flop_count(self.post_cf2, frame_rate)
+ + _conv1d_flop_count(self.post_af1, frame_rate) + _conv1d_flop_count(self.post_af2, frame_rate) + _conv1d_flop_count(self.post_af3, frame_rate))
+
+ if verbose:
+ print(f"feature net: {feature_net_flops / 1e6} MFLOPS")
+ print(f"comb filters: {comb_flops / 1e6} MFLOPS")
+ print(f"adaptive conv: {af_flops / 1e6} MFLOPS")
+ print(f"feature transforms: {feature_flops / 1e6} MFLOPS")
+
+ return feature_net_flops + comb_flops + af_flops + feature_flops + shape_flops
+
+ def feature_transform(self, f, layer):
+ f0 = f.permute(0, 2, 1)
+ f = F.pad(f0, [1, 0])
+ f = torch.tanh(layer(f))
+ return f.permute(0, 2, 1)
+
+ def forward(self, x, features, periods, numbits, debug=False):
+
+ periods = periods.squeeze(-1)
+ pitch_embedding = self.pitch_embedding(periods)
+ numbits_embedding = self.numbits_embedding(numbits).flatten(2)
+
+ full_features = torch.cat((features, pitch_embedding, numbits_embedding), dim=-1)
+ cf = self.feature_net(full_features)
+
+ y = self.cf1(x, cf, periods, debug=debug)
+ cf = self.feature_transform(cf, self.post_cf1)
+
+ y = self.cf2(y, cf, periods, debug=debug)
+ cf = self.feature_transform(cf, self.post_cf2)
+
+ y = self.af1(y, cf, debug=debug)
+ cf = self.feature_transform(cf, self.post_af1)
+
+ y1 = y[:, 0:1, :]
+ y2 = self.tdshape1(y[:, 1:2, :], cf)
+ y = torch.cat((y1, y2), dim=1)
+ y = self.af2(y, cf, debug=debug)
+ cf = self.feature_transform(cf, self.post_af2)
+
+ y1 = y[:, 0:1, :]
+ y2 = self.tdshape2(y[:, 1:2, :], cf)
+ y = torch.cat((y1, y2), dim=1)
+ y = self.af3(y, cf, debug=debug)
+ cf = self.feature_transform(cf, self.post_af3)
+
+ y1 = y[:, 0:1, :]
+ y2 = self.tdshape3(y[:, 1:2, :], cf)
+ y = torch.cat((y1, y2), dim=1)
+ y = self.af4(y, cf, debug=debug)
+
+ return y
diff --git a/dnn/torch/osce/models/scale_embedding.py b/dnn/torch/osce/models/scale_embedding.py
new file mode 100644
index 00000000..58695302
--- /dev/null
+++ b/dnn/torch/osce/models/scale_embedding.py
@@ -0,0 +1,68 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import math as m
+import torch
+from torch import nn
+
+
+class ScaleEmbedding(nn.Module):
+ def __init__(self,
+ dim,
+ min_val,
+ max_val,
+ logscale=False):
+
+ super().__init__()
+
+ if min_val >= max_val:
+ raise ValueError('min_val must be smaller than max_val')
+
+ if min_val <= 0 and logscale:
+ raise ValueError('min_val must be positive when logscale is true')
+
+ self.dim = dim
+ self.logscale = logscale
+ self.min_val = min_val
+ self.max_val = max_val
+
+ if logscale:
+ self.min_val = m.log(self.min_val)
+ self.max_val = m.log(self.max_val)
+
+
+ self.offset = (self.min_val + self.max_val) / 2
+ self.scale_factors = nn.Parameter(
+ torch.arange(1, dim+1, dtype=torch.float32) * torch.pi / (self.max_val - self.min_val)
+ )
+
+ def forward(self, x):
+ if self.logscale: x = torch.log(x)
+ x = torch.clip(x, self.min_val, self.max_val) - self.offset
+ return torch.sin(x.unsqueeze(-1) * self.scale_factors - 0.5)
diff --git a/dnn/torch/osce/models/shape_up_48.py b/dnn/torch/osce/models/shape_up_48.py
new file mode 100644
index 00000000..6e717f0c
--- /dev/null
+++ b/dnn/torch/osce/models/shape_up_48.py
@@ -0,0 +1,179 @@
+
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+import numpy as np
+
+from utils.layers.silk_upsampler import SilkUpsampler
+from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+from utils.layers.td_shaper import TDShaper
+from utils.layers.deemph import Deemph
+from utils.misc import freeze_model
+
+from models.nns_base import NNSBase
+from models.silk_feature_net_pl import SilkFeatureNetPL
+from models.silk_feature_net import SilkFeatureNet
+from .scale_embedding import ScaleEmbedding
+
+
+
+class ShapeUp48(NNSBase):
+ FRAME_SIZE16k=80
+
+ def __init__(self,
+ num_features=47,
+ pitch_embedding_dim=64,
+ cond_dim=256,
+ pitch_max=257,
+ kernel_size=15,
+ preemph=0.85,
+ skip=288,
+ conv_gain_limits_db=[-6, 6],
+ numbits_range=[50, 650],
+ numbits_embedding_dim=8,
+ hidden_feature_dim=64,
+ partial_lookahead=True,
+ norm_p=2,
+ target_fs=48000,
+ noise_amplitude=0,
+ prenet=None,
+ avg_pool_k=4):
+
+ super().__init__(skip=skip, preemph=preemph)
+
+
+ self.num_features = num_features
+ self.cond_dim = cond_dim
+ self.pitch_max = pitch_max
+ self.pitch_embedding_dim = pitch_embedding_dim
+ self.kernel_size = kernel_size
+ self.preemph = preemph
+ self.skip = skip
+ self.numbits_range = numbits_range
+ self.numbits_embedding_dim = numbits_embedding_dim
+ self.hidden_feature_dim = hidden_feature_dim
+ self.partial_lookahead = partial_lookahead
+ self.frame_size48 = int(self.FRAME_SIZE16k * target_fs / 16000 + .1)
+ self.frame_size32 = self.FRAME_SIZE16k * 2
+ self.noise_amplitude = noise_amplitude
+ self.prenet = prenet
+
+ # freeze prenet if given
+ if prenet is not None:
+ freeze_model(self.prenet)
+ try:
+ self.deemph = Deemph(prenet.preemph)
+ except:
+ print("[warning] prenet model is expected to have preemph attribute")
+ self.deemph = Deemph(0)
+
+
+
+ # upsampler
+ self.upsampler = SilkUpsampler()
+
+ # pitch embedding
+ self.pitch_embedding = nn.Embedding(pitch_max + 1, pitch_embedding_dim)
+
+ # numbits embedding
+ self.numbits_embedding = ScaleEmbedding(numbits_embedding_dim, *numbits_range, logscale=True)
+
+ # feature net
+ if partial_lookahead:
+ self.feature_net = SilkFeatureNetPL(num_features + pitch_embedding_dim + 2 * numbits_embedding_dim, cond_dim, hidden_feature_dim)
+ else:
+ self.feature_net = SilkFeatureNet(num_features + pitch_embedding_dim + 2 * numbits_embedding_dim, cond_dim)
+
+ # non-linear transforms
+ self.tdshape1 = TDShaper(cond_dim, frame_size=self.frame_size32, avg_pool_k=avg_pool_k)
+ self.tdshape2 = TDShaper(cond_dim, frame_size=self.frame_size48, avg_pool_k=avg_pool_k)
+
+ # spectral shaping
+ self.af_noise = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.frame_size32, overlap_size=self.frame_size32//2, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=[-30, 0], norm_p=norm_p)
+ self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.frame_size32, overlap_size=self.frame_size32//2, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+ self.af2 = LimitedAdaptiveConv1d(3, 2, self.kernel_size, cond_dim, frame_size=self.frame_size32, overlap_size=self.frame_size32//2, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+ self.af3 = LimitedAdaptiveConv1d(2, 1, self.kernel_size, cond_dim, frame_size=self.frame_size48, overlap_size=self.frame_size48//2, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
+
+
+ def flop_count(self, rate=16000, verbose=False):
+
+ frame_rate = rate / self.FRAME_SIZE16k
+
+ # feature net
+ feature_net_flops = self.feature_net.flop_count(frame_rate)
+ af_flops = self.af1.flop_count(rate) + self.af2.flop_count(2 * rate) + self.af3.flop_count(3 * rate)
+
+ if verbose:
+ print(f"feature net: {feature_net_flops / 1e6} MFLOPS")
+ print(f"adaptive conv: {af_flops / 1e6} MFLOPS")
+
+ return feature_net_flops + af_flops
+
+ def forward(self, x, features, periods, numbits, debug=False):
+
+ if self.prenet is not None:
+ with torch.no_grad():
+ x = self.prenet(x, features, periods, numbits)
+ x = self.deemph(x)
+
+
+
+ periods = periods.squeeze(-1)
+ pitch_embedding = self.pitch_embedding(periods)
+ numbits_embedding = self.numbits_embedding(numbits).flatten(2)
+
+ full_features = torch.cat((features, pitch_embedding, numbits_embedding), dim=-1)
+ cf = self.feature_net(full_features)
+
+ y32 = self.upsampler.hq_2x_up(x)
+
+ noise = self.noise_amplitude * torch.randn_like(y32)
+ noise = self.af_noise(noise, cf)
+
+ y32 = self.af1(y32, cf, debug=debug)
+
+ y32_1 = y32[:, 0:1, :]
+ y32_2 = self.tdshape1(y32[:, 1:2, :], cf)
+ y32 = torch.cat((y32_1, y32_2, noise), dim=1)
+
+ y32 = self.af2(y32, cf, debug=debug)
+
+ y48 = self.upsampler.interpolate_3_2(y32)
+
+ y48_1 = y48[:, 0:1, :]
+ y48_2 = self.tdshape2(y48[:, 1:2, :], cf)
+ y48 = torch.cat((y48_1, y48_2), dim=1)
+
+ y48 = self.af3(y48, cf, debug=debug)
+
+ return y48
diff --git a/dnn/torch/osce/models/silk_feature_net.py b/dnn/torch/osce/models/silk_feature_net.py
new file mode 100644
index 00000000..ed22f52e
--- /dev/null
+++ b/dnn/torch/osce/models/silk_feature_net.py
@@ -0,0 +1,86 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from utils.complexity import _conv1d_flop_count
+
+class SilkFeatureNet(nn.Module):
+
+ def __init__(self,
+ feature_dim=47,
+ num_channels=256,
+ lookahead=False):
+
+ super(SilkFeatureNet, self).__init__()
+
+ self.feature_dim = feature_dim
+ self.num_channels = num_channels
+ self.lookahead = lookahead
+
+ self.conv1 = nn.Conv1d(feature_dim, num_channels, 3)
+ self.conv2 = nn.Conv1d(num_channels, num_channels, 3, dilation=2)
+
+ self.gru = nn.GRU(num_channels, num_channels, batch_first=True)
+
+ def flop_count(self, rate=200):
+ count = 0
+ for conv in self.conv1, self.conv2:
+ count += _conv1d_flop_count(conv, rate)
+
+ count += 2 * (3 * self.gru.input_size * self.gru.hidden_size + 3 * self.gru.hidden_size * self.gru.hidden_size) * rate
+
+ return count
+
+
+ def forward(self, features, state=None):
+ """ features shape: (batch_size, num_frames, feature_dim) """
+
+ batch_size = features.size(0)
+
+ if state is None:
+ state = torch.zeros((1, batch_size, self.num_channels), device=features.device)
+
+
+ features = features.permute(0, 2, 1)
+ if self.lookahead:
+ c = torch.tanh(self.conv1(F.pad(features, [1, 1])))
+ c = torch.tanh(self.conv2(F.pad(c, [2, 2])))
+ else:
+ c = torch.tanh(self.conv1(F.pad(features, [2, 0])))
+ c = torch.tanh(self.conv2(F.pad(c, [4, 0])))
+
+ c = c.permute(0, 2, 1)
+
+ c, _ = self.gru(c, state)
+
+ return c \ No newline at end of file
diff --git a/dnn/torch/osce/models/silk_feature_net_pl.py b/dnn/torch/osce/models/silk_feature_net_pl.py
new file mode 100644
index 00000000..e6a536fe
--- /dev/null
+++ b/dnn/torch/osce/models/silk_feature_net_pl.py
@@ -0,0 +1,127 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+import sys
+sys.path.append('../dnntools')
+import numbers
+
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.utils import weight_norm
+
+from utils.complexity import _conv1d_flop_count
+
+from dnntools.quantization.softquant import soft_quant
+from dnntools.sparsification import mark_for_sparsification
+
+class SilkFeatureNetPL(nn.Module):
+ """ feature net with partial lookahead """
+ def __init__(self,
+ feature_dim=47,
+ num_channels=256,
+ hidden_feature_dim=64,
+ softquant=False,
+ sparsify=True,
+ sparsification_density=0.5,
+ apply_weight_norm=False):
+
+ super(SilkFeatureNetPL, self).__init__()
+
+ if isinstance(sparsification_density, numbers.Number):
+ sparsification_density = 4 * [sparsification_density]
+
+ self.feature_dim = feature_dim
+ self.num_channels = num_channels
+ self.hidden_feature_dim = hidden_feature_dim
+
+ norm = weight_norm if apply_weight_norm else lambda x, name=None: x
+
+ self.conv1 = norm(nn.Conv1d(feature_dim, self.hidden_feature_dim, 1))
+ self.conv2 = norm(nn.Conv1d(4 * self.hidden_feature_dim, num_channels, 2))
+ self.tconv = norm(nn.ConvTranspose1d(num_channels, num_channels, 4, 4))
+ self.gru = norm(norm(nn.GRU(num_channels, num_channels, batch_first=True), name='weight_hh_l0'), name='weight_ih_l0')
+
+ if softquant:
+ self.conv2 = soft_quant(self.conv2)
+ self.tconv = soft_quant(self.tconv)
+ self.gru = soft_quant(self.gru, names=['weight_hh_l0', 'weight_ih_l0'])
+
+
+ if sparsify:
+ mark_for_sparsification(self.conv2, (sparsification_density[0], [8, 4]))
+ mark_for_sparsification(self.tconv, (sparsification_density[1], [8, 4]))
+ mark_for_sparsification(
+ self.gru,
+ {
+ 'W_ir' : (sparsification_density[2], [8, 4], False),
+ 'W_iz' : (sparsification_density[2], [8, 4], False),
+ 'W_in' : (sparsification_density[2], [8, 4], False),
+ 'W_hr' : (sparsification_density[3], [8, 4], True),
+ 'W_hz' : (sparsification_density[3], [8, 4], True),
+ 'W_hn' : (sparsification_density[3], [8, 4], True),
+ }
+ )
+
+
+ def flop_count(self, rate=200):
+ count = 0
+ for conv in self.conv1, self.conv2, self.tconv:
+ count += _conv1d_flop_count(conv, rate)
+
+ count += 2 * (3 * self.gru.input_size * self.gru.hidden_size + 3 * self.gru.hidden_size * self.gru.hidden_size) * rate
+
+ return count
+
+
+ def forward(self, features, state=None):
+ """ features shape: (batch_size, num_frames, feature_dim) """
+
+ batch_size = features.size(0)
+ num_frames = features.size(1)
+
+ if state is None:
+ state = torch.zeros((1, batch_size, self.num_channels), device=features.device)
+
+ features = features.permute(0, 2, 1)
+ # dimensionality reduction
+ c = torch.tanh(self.conv1(features))
+
+ # frame accumulation
+ c = c.permute(0, 2, 1)
+ c = c.reshape(batch_size, num_frames // 4, -1).permute(0, 2, 1)
+ c = torch.tanh(self.conv2(F.pad(c, [1, 0])))
+
+ # upsampling
+ c = torch.tanh(self.tconv(c))
+ c = c.permute(0, 2, 1)
+
+ c, _ = self.gru(c, state)
+
+ return c \ No newline at end of file
diff --git a/dnn/torch/osce/requirements.txt b/dnn/torch/osce/requirements.txt
new file mode 100644
index 00000000..4ca566e5
--- /dev/null
+++ b/dnn/torch/osce/requirements.txt
@@ -0,0 +1,9 @@
+pyyaml==6.0.1
+torch==2.0.1
+numpy==1.25.2
+scipy==1.11.2
+pesq==0.0.4
+gitpython==3.1.36
+matplotlib==3.7.3
+torchaudio==2.0.2
+tqdm==4.66.1
diff --git a/dnn/torch/osce/stndrd/evaluation/create_input_data.sh b/dnn/torch/osce/stndrd/evaluation/create_input_data.sh
new file mode 100644
index 00000000..54bacb88
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/create_input_data.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+
+INPUT="dataset/LibriSpeech"
+OUTPUT="testdata"
+OPUSDEMO="/local/experiments/ietf_enhancement_studies/bin/opus_demo_patched"
+BITRATES=( 6000 7500 ) # 9000 12000 15000 18000 24000 32000 )
+
+
+mkdir -p $OUTPUT
+
+for fn in $(find $INPUT -name "*.wav")
+do
+ name=$(basename ${fn%*.wav})
+ sox $fn -r 16000 -b 16 -e signed-integer ${OUTPUT}/tmp.raw
+ for br in ${BITRATES[@]}
+ do
+ folder=${OUTPUT}/"${name}_${br}.se"
+ echo "creating ${folder}..."
+ mkdir -p $folder
+ cp ${OUTPUT}/tmp.raw ${folder}/clean.s16
+ (cd ${folder} && $OPUSDEMO voip 16000 1 $br clean.s16 noisy.s16)
+ done
+ rm -f ${OUTPUT}/tmp.raw
+done
diff --git a/dnn/torch/osce/stndrd/evaluation/env.rc b/dnn/torch/osce/stndrd/evaluation/env.rc
new file mode 100644
index 00000000..f1266b6f
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/env.rc
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+export PYTHON=/home/ubuntu/opt/miniconda3/envs/torch/bin/python
+export LACE="/local/experiments/ietf_enhancement_studies/checkpoints/lace_checkpoint.pth"
+export NOLACE="/local/experiments/ietf_enhancement_studies/checkpoints/nolace_checkpoint.pth"
+export TESTMODEL="/local/experiments/ietf_enhancement_studies/opus/dnn/torch/osce/test_model.py"
+export OPUSDEMO="/local/experiments/ietf_enhancement_studies/bin/opus_demo_patched" \ No newline at end of file
diff --git a/dnn/torch/osce/stndrd/evaluation/evaluate.py b/dnn/torch/osce/stndrd/evaluation/evaluate.py
new file mode 100644
index 00000000..54700dbe
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/evaluate.py
@@ -0,0 +1,113 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+
+from scipy.io import wavfile
+from pesq import pesq
+import numpy as np
+from moc import compare
+from moc2 import compare as compare2
+#from warpq import compute_WAPRQ as warpq
+from lace_loss_metric import compare as laceloss_compare
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('folder', type=str, help='folder with processed items')
+parser.add_argument('metric', type=str, choices=['pesq', 'moc', 'moc2', 'laceloss'], help='metric to be used for evaluation')
+
+
+def get_bitrates(folder):
+ with open(os.path.join(folder, 'bitrates.txt')) as f:
+ x = f.read()
+
+ bitrates = [int(y) for y in x.rstrip('\n').split()]
+
+ return bitrates
+
+def get_itemlist(folder):
+ with open(os.path.join(folder, 'items.txt')) as f:
+ lines = f.readlines()
+
+ items = [x.split()[0] for x in lines]
+
+ return items
+
+
+def process_item(folder, item, bitrate, metric):
+ fs, x_clean = wavfile.read(os.path.join(folder, 'clean', f"{item}_{bitrate}_clean.wav"))
+ fs, x_opus = wavfile.read(os.path.join(folder, 'opus', f"{item}_{bitrate}_opus.wav"))
+ fs, x_lace = wavfile.read(os.path.join(folder, 'lace', f"{item}_{bitrate}_lace.wav"))
+ fs, x_nolace = wavfile.read(os.path.join(folder, 'nolace', f"{item}_{bitrate}_nolace.wav"))
+
+ x_clean = x_clean.astype(np.float32) / 2**15
+ x_opus = x_opus.astype(np.float32) / 2**15
+ x_lace = x_lace.astype(np.float32) / 2**15
+ x_nolace = x_nolace.astype(np.float32) / 2**15
+
+ if metric == 'pesq':
+ result = [pesq(fs, x_clean, x_opus), pesq(fs, x_clean, x_lace), pesq(fs, x_clean, x_nolace)]
+ elif metric =='moc':
+ result = [compare(x_clean, x_opus), compare(x_clean, x_lace), compare(x_clean, x_nolace)]
+ elif metric =='moc2':
+ result = [compare2(x_clean, x_opus), compare2(x_clean, x_lace), compare2(x_clean, x_nolace)]
+ # elif metric == 'warpq':
+ # result = [warpq(x_clean, x_opus), warpq(x_clean, x_lace), warpq(x_clean, x_nolace)]
+ elif metric == 'laceloss':
+ result = [laceloss_compare(x_clean, x_opus), laceloss_compare(x_clean, x_lace), laceloss_compare(x_clean, x_nolace)]
+ else:
+ raise ValueError(f'unknown metric {metric}')
+
+ return result
+
+def process_bitrate(folder, items, bitrate, metric):
+ results = np.zeros((len(items), 3))
+
+ for i, item in enumerate(items):
+ results[i, :] = np.array(process_item(folder, item, bitrate, metric))
+
+ return results
+
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ items = get_itemlist(args.folder)
+ bitrates = get_bitrates(args.folder)
+
+ results = dict()
+ for br in bitrates:
+ print(f"processing bitrate {br}...")
+ results[br] = process_bitrate(args.folder, items, br, args.metric)
+
+ np.save(os.path.join(args.folder, f'results_{args.metric}.npy'), results)
+
+ print("Done.")
diff --git a/dnn/torch/osce/stndrd/evaluation/lace_loss_metric.py b/dnn/torch/osce/stndrd/evaluation/lace_loss_metric.py
new file mode 100644
index 00000000..b0790585
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/lace_loss_metric.py
@@ -0,0 +1,330 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+"""STFT-based Loss modules."""
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+import numpy as np
+import torchaudio
+
+
+def get_window(win_name, win_length, *args, **kwargs):
+ window_dict = {
+ 'bartlett_window' : torch.bartlett_window,
+ 'blackman_window' : torch.blackman_window,
+ 'hamming_window' : torch.hamming_window,
+ 'hann_window' : torch.hann_window,
+ 'kaiser_window' : torch.kaiser_window
+ }
+
+ if not win_name in window_dict:
+ raise ValueError()
+
+ return window_dict[win_name](win_length, *args, **kwargs)
+
+
+def stft(x, fft_size, hop_size, win_length, window):
+ """Perform STFT and convert to magnitude spectrogram.
+ Args:
+ x (Tensor): Input signal tensor (B, T).
+ fft_size (int): FFT size.
+ hop_size (int): Hop size.
+ win_length (int): Window length.
+ window (str): Window function type.
+ Returns:
+ Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
+ """
+
+ win = get_window(window, win_length).to(x.device)
+ x_stft = torch.stft(x, fft_size, hop_size, win_length, win, return_complex=True)
+
+
+ return torch.clamp(torch.abs(x_stft), min=1e-7)
+
+def spectral_convergence_loss(Y_true, Y_pred):
+ dims=list(range(1, len(Y_pred.shape)))
+ return torch.mean(torch.norm(torch.abs(Y_true) - torch.abs(Y_pred), p="fro", dim=dims) / (torch.norm(Y_pred, p="fro", dim=dims) + 1e-6))
+
+
+def log_magnitude_loss(Y_true, Y_pred):
+ Y_true_log_abs = torch.log(torch.abs(Y_true) + 1e-15)
+ Y_pred_log_abs = torch.log(torch.abs(Y_pred) + 1e-15)
+
+ return torch.mean(torch.abs(Y_true_log_abs - Y_pred_log_abs))
+
+def spectral_xcorr_loss(Y_true, Y_pred):
+ Y_true = Y_true.abs()
+ Y_pred = Y_pred.abs()
+ dims=list(range(1, len(Y_pred.shape)))
+ xcorr = torch.sum(Y_true * Y_pred, dim=dims) / torch.sqrt(torch.sum(Y_true ** 2, dim=dims) * torch.sum(Y_pred ** 2, dim=dims) + 1e-9)
+
+ return 1 - xcorr.mean()
+
+
+
+class MRLogMelLoss(nn.Module):
+ def __init__(self,
+ fft_sizes=[512, 256, 128, 64],
+ overlap=0.5,
+ fs=16000,
+ n_mels=18
+ ):
+
+ self.fft_sizes = fft_sizes
+ self.overlap = overlap
+ self.fs = fs
+ self.n_mels = n_mels
+
+ super().__init__()
+
+ self.mel_specs = []
+ for fft_size in fft_sizes:
+ hop_size = int(round(fft_size * (1 - self.overlap)))
+
+ n_mels = self.n_mels
+ if fft_size < 128:
+ n_mels //= 2
+
+ self.mel_specs.append(torchaudio.transforms.MelSpectrogram(fs, fft_size, hop_length=hop_size, n_mels=n_mels))
+
+ for i, mel_spec in enumerate(self.mel_specs):
+ self.add_module(f'mel_spec_{i+1}', mel_spec)
+
+ def forward(self, y_true, y_pred):
+
+ loss = torch.zeros(1, device=y_true.device)
+
+ for mel_spec in self.mel_specs:
+ Y_true = mel_spec(y_true)
+ Y_pred = mel_spec(y_pred)
+ loss = loss + log_magnitude_loss(Y_true, Y_pred)
+
+ loss = loss / len(self.mel_specs)
+
+ return loss
+
+def create_weight_matrix(num_bins, bins_per_band=10):
+ m = torch.zeros((num_bins, num_bins), dtype=torch.float32)
+
+ r0 = bins_per_band // 2
+ r1 = bins_per_band - r0
+
+ for i in range(num_bins):
+ i0 = max(i - r0, 0)
+ j0 = min(i + r1, num_bins)
+
+ m[i, i0: j0] += 1
+
+ if i < r0:
+ m[i, :r0 - i] += 1
+
+ if i > num_bins - r1:
+ m[i, num_bins - r1 - i:] += 1
+
+ return m / bins_per_band
+
+def weighted_spectral_convergence(Y_true, Y_pred, w):
+
+ # calculate sfm based weights
+ logY = torch.log(torch.abs(Y_true) + 1e-9)
+ Y = torch.abs(Y_true)
+
+ avg_logY = torch.matmul(logY.transpose(1, 2), w)
+ avg_Y = torch.matmul(Y.transpose(1, 2), w)
+
+ sfm = torch.exp(avg_logY) / (avg_Y + 1e-9)
+
+ weight = (torch.relu(1 - sfm) ** .5).transpose(1, 2)
+
+ loss = torch.mean(
+ torch.mean(weight * torch.abs(torch.abs(Y_true) - torch.abs(Y_pred)), dim=[1, 2])
+ / (torch.mean( weight * torch.abs(Y_true), dim=[1, 2]) + 1e-9)
+ )
+
+ return loss
+
+def gen_filterbank(N, Fs=16000):
+ in_freq = (np.arange(N+1, dtype='float32')/N*Fs/2)[None,:]
+ out_freq = (np.arange(N, dtype='float32')/N*Fs/2)[:,None]
+ #ERB from B.C.J Moore, An Introduction to the Psychology of Hearing, 5th Ed., page 73.
+ ERB_N = 24.7 + .108*in_freq
+ delta = np.abs(in_freq-out_freq)/ERB_N
+ center = (delta<.5).astype('float32')
+ R = -12*center*delta**2 + (1-center)*(3-12*delta)
+ RE = 10.**(R/10.)
+ norm = np.sum(RE, axis=1)
+ RE = RE/norm[:, np.newaxis]
+ return torch.from_numpy(RE)
+
+def smooth_log_mag(Y_true, Y_pred, filterbank):
+ Y_true_smooth = torch.matmul(filterbank, torch.abs(Y_true))
+ Y_pred_smooth = torch.matmul(filterbank, torch.abs(Y_pred))
+
+ loss = torch.abs(
+ torch.log(Y_true_smooth + 1e-9) - torch.log(Y_pred_smooth + 1e-9)
+ )
+
+ loss = loss.mean()
+
+ return loss
+
+class MRSTFTLoss(nn.Module):
+ def __init__(self,
+ fft_sizes=[2048, 1024, 512, 256, 128, 64],
+ overlap=0.5,
+ window='hann_window',
+ fs=16000,
+ log_mag_weight=0,
+ sc_weight=0,
+ wsc_weight=0,
+ smooth_log_mag_weight=2,
+ sxcorr_weight=1):
+ super().__init__()
+
+ self.fft_sizes = fft_sizes
+ self.overlap = overlap
+ self.window = window
+ self.log_mag_weight = log_mag_weight
+ self.sc_weight = sc_weight
+ self.wsc_weight = wsc_weight
+ self.smooth_log_mag_weight = smooth_log_mag_weight
+ self.sxcorr_weight = sxcorr_weight
+ self.fs = fs
+
+ # weights for SFM weighted spectral convergence loss
+ self.wsc_weights = torch.nn.ParameterDict()
+ for fft_size in fft_sizes:
+ width = min(11, int(1000 * fft_size / self.fs + .5))
+ width += width % 2
+ self.wsc_weights[str(fft_size)] = torch.nn.Parameter(
+ create_weight_matrix(fft_size // 2 + 1, width),
+ requires_grad=False
+ )
+
+ # filterbanks for smooth log magnitude loss
+ self.filterbanks = torch.nn.ParameterDict()
+ for fft_size in fft_sizes:
+ self.filterbanks[str(fft_size)] = torch.nn.Parameter(
+ gen_filterbank(fft_size//2),
+ requires_grad=False
+ )
+
+
+ def __call__(self, y_true, y_pred):
+
+
+ lm_loss = torch.zeros(1, device=y_true.device)
+ sc_loss = torch.zeros(1, device=y_true.device)
+ wsc_loss = torch.zeros(1, device=y_true.device)
+ slm_loss = torch.zeros(1, device=y_true.device)
+ sxcorr_loss = torch.zeros(1, device=y_true.device)
+
+ for fft_size in self.fft_sizes:
+ hop_size = int(round(fft_size * (1 - self.overlap)))
+ win_size = fft_size
+
+ Y_true = stft(y_true, fft_size, hop_size, win_size, self.window)
+ Y_pred = stft(y_pred, fft_size, hop_size, win_size, self.window)
+
+ if self.log_mag_weight > 0:
+ lm_loss = lm_loss + log_magnitude_loss(Y_true, Y_pred)
+
+ if self.sc_weight > 0:
+ sc_loss = sc_loss + spectral_convergence_loss(Y_true, Y_pred)
+
+ if self.wsc_weight > 0:
+ wsc_loss = wsc_loss + weighted_spectral_convergence(Y_true, Y_pred, self.wsc_weights[str(fft_size)])
+
+ if self.smooth_log_mag_weight > 0:
+ slm_loss = slm_loss + smooth_log_mag(Y_true, Y_pred, self.filterbanks[str(fft_size)])
+
+ if self.sxcorr_weight > 0:
+ sxcorr_loss = sxcorr_loss + spectral_xcorr_loss(Y_true, Y_pred)
+
+
+ total_loss = (self.log_mag_weight * lm_loss + self.sc_weight * sc_loss
+ + self.wsc_weight * wsc_loss + self.smooth_log_mag_weight * slm_loss
+ + self.sxcorr_weight * sxcorr_loss) / len(self.fft_sizes)
+
+ return total_loss
+
+
+def td_l2_norm(y_true, y_pred):
+ dims = list(range(1, len(y_true.shape)))
+
+ loss = torch.mean((y_true - y_pred) ** 2, dim=dims) / (torch.mean(y_pred ** 2, dim=dims) ** .5 + 1e-6)
+
+ return loss.mean()
+
+
+class LaceLoss(nn.Module):
+ def __init__(self):
+ super().__init__()
+
+
+ self.stftloss = MRSTFTLoss(log_mag_weight=0, sc_weight=0, wsc_weight=0, smooth_log_mag_weight=2, sxcorr_weight=1)
+
+
+ def forward(self, x, y):
+ specloss = self.stftloss(x, y)
+ phaseloss = td_l2_norm(x, y)
+ total_loss = (specloss + 10 * phaseloss) / 13
+
+ return total_loss
+
+ def compare(self, x_ref, x_deg):
+ # trim items to same size
+ n = min(len(x_ref), len(x_deg))
+ x_ref = x_ref[:n].copy()
+ x_deg = x_deg[:n].copy()
+
+ # pre-emphasis
+ x_ref[1:] -= 0.85 * x_ref[:-1]
+ x_deg[1:] -= 0.85 * x_deg[:-1]
+
+ device = next(iter(self.parameters())).device
+
+ x = torch.from_numpy(x_ref).to(device)
+ y = torch.from_numpy(x_deg).to(device)
+
+ with torch.no_grad():
+ dist = 10 * self.forward(x, y)
+
+ return dist.cpu().numpy().item()
+
+
+lace_loss = LaceLoss()
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+lace_loss.to(device)
+
+def compare(x, y):
+
+ return lace_loss.compare(x, y)
diff --git a/dnn/torch/osce/stndrd/evaluation/make_boxplots.py b/dnn/torch/osce/stndrd/evaluation/make_boxplots.py
new file mode 100644
index 00000000..f7ea778a
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/make_boxplots.py
@@ -0,0 +1,116 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+import numpy as np
+import matplotlib.pyplot as plt
+from prettytable import PrettyTable
+from matplotlib.patches import Patch
+
+parser = argparse.ArgumentParser()
+parser.add_argument('folder', type=str, help='path to folder with pre-calculated metrics')
+parser.add_argument('--metric', choices=['pesq', 'moc', 'warpq', 'nomad', 'laceloss', 'all'], default='all', help='default: all')
+parser.add_argument('--output', type=str, default=None, help='alternative output folder, default: folder')
+
+def load_data(folder):
+ data = dict()
+
+ if os.path.isfile(os.path.join(folder, 'results_moc.npy')):
+ data['moc'] = np.load(os.path.join(folder, 'results_moc.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_moc2.npy')):
+ data['moc2'] = np.load(os.path.join(folder, 'results_moc2.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_pesq.npy')):
+ data['pesq'] = np.load(os.path.join(folder, 'results_pesq.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_warpq.npy')):
+ data['warpq'] = np.load(os.path.join(folder, 'results_warpq.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_nomad.npy')):
+ data['nomad'] = np.load(os.path.join(folder, 'results_nomad.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_laceloss.npy')):
+ data['laceloss'] = np.load(os.path.join(folder, 'results_laceloss.npy'), allow_pickle=True).item()
+
+ return data
+
+def plot_data(filename, data, title=None):
+ compare_dict = dict()
+ for br in data.keys():
+ compare_dict[f'Opus {br/1000:.1f} kb/s'] = data[br][:, 0]
+ compare_dict[f'LACE {br/1000:.1f} kb/s'] = data[br][:, 1]
+ compare_dict[f'NoLACE {br/1000:.1f} kb/s'] = data[br][:, 2]
+
+ plt.rcParams.update({
+ "text.usetex": True,
+ "font.family": "Helvetica",
+ "font.size": 32
+ })
+
+ black = '#000000'
+ red = '#ff5745'
+ blue = '#007dbc'
+ colors = [black, red, blue]
+ legend_elements = [Patch(facecolor=colors[0], label='Opus SILK'),
+ Patch(facecolor=colors[1], label='LACE'),
+ Patch(facecolor=colors[2], label='NoLACE')]
+
+ fig, ax = plt.subplots()
+ fig.set_size_inches(40, 20)
+ bplot = ax.boxplot(compare_dict.values(), showfliers=False, notch=True, patch_artist=True)
+
+ for i, patch in enumerate(bplot['boxes']):
+ patch.set_facecolor(colors[i%3])
+
+ ax.set_xticklabels(compare_dict.keys(), rotation=290)
+
+ if title is not None:
+ ax.set_title(title)
+
+ ax.legend(handles=legend_elements)
+
+ fig.savefig(filename, bbox_inches='tight')
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+ data = load_data(args.folder)
+
+
+ metrics = list(data.keys()) if args.metric == 'all' else [args.metric]
+ folder = args.folder if args.output is None else args.output
+ os.makedirs(folder, exist_ok=True)
+
+ for metric in metrics:
+ print(f"Plotting data for {metric} metric...")
+ plot_data(os.path.join(folder, f"boxplot_{metric}.png"), data[metric], title=metric.upper())
+
+ print("Done.") \ No newline at end of file
diff --git a/dnn/torch/osce/stndrd/evaluation/make_boxplots_moctest.py b/dnn/torch/osce/stndrd/evaluation/make_boxplots_moctest.py
new file mode 100644
index 00000000..ca65aba9
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/make_boxplots_moctest.py
@@ -0,0 +1,109 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+import numpy as np
+import matplotlib.pyplot as plt
+from prettytable import PrettyTable
+from matplotlib.patches import Patch
+
+parser = argparse.ArgumentParser()
+parser.add_argument('folder', type=str, help='path to folder with pre-calculated metrics')
+parser.add_argument('--metric', choices=['pesq', 'moc', 'warpq', 'nomad', 'laceloss', 'all'], default='all', help='default: all')
+parser.add_argument('--output', type=str, default=None, help='alternative output folder, default: folder')
+
+def load_data(folder):
+ data = dict()
+
+ if os.path.isfile(os.path.join(folder, 'results_moc.npy')):
+ data['moc'] = np.load(os.path.join(folder, 'results_moc.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_pesq.npy')):
+ data['pesq'] = np.load(os.path.join(folder, 'results_pesq.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_warpq.npy')):
+ data['warpq'] = np.load(os.path.join(folder, 'results_warpq.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_nomad.npy')):
+ data['nomad'] = np.load(os.path.join(folder, 'results_nomad.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_laceloss.npy')):
+ data['laceloss'] = np.load(os.path.join(folder, 'results_laceloss.npy'), allow_pickle=True).item()
+
+ return data
+
+def plot_data(filename, data, title=None):
+ compare_dict = dict()
+ for br in data.keys():
+ compare_dict[f'Opus {br/1000:.1f} kb/s'] = data[br][:, 0]
+ compare_dict[f'LACE (MOC only) {br/1000:.1f} kb/s'] = data[br][:, 1]
+ compare_dict[f'LACE (MOC + TD) {br/1000:.1f} kb/s'] = data[br][:, 2]
+
+ plt.rcParams.update({
+ "text.usetex": True,
+ "font.family": "Helvetica",
+ "font.size": 32
+ })
+ colors = ['pink', 'lightblue', 'lightgreen']
+ legend_elements = [Patch(facecolor=colors[0], label='Opus SILK'),
+ Patch(facecolor=colors[1], label='MOC loss only'),
+ Patch(facecolor=colors[2], label='MOC + TD loss')]
+
+ fig, ax = plt.subplots()
+ fig.set_size_inches(40, 20)
+ bplot = ax.boxplot(compare_dict.values(), showfliers=False, notch=True, patch_artist=True)
+
+ for i, patch in enumerate(bplot['boxes']):
+ patch.set_facecolor(colors[i%3])
+
+ ax.set_xticklabels(compare_dict.keys(), rotation=290)
+
+ if title is not None:
+ ax.set_title(title)
+
+ ax.legend(handles=legend_elements)
+
+ fig.savefig(filename, bbox_inches='tight')
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+ data = load_data(args.folder)
+
+
+ metrics = list(data.keys()) if args.metric == 'all' else [args.metric]
+ folder = args.folder if args.output is None else args.output
+ os.makedirs(folder, exist_ok=True)
+
+ for metric in metrics:
+ print(f"Plotting data for {metric} metric...")
+ plot_data(os.path.join(folder, f"boxplot_{metric}.png"), data[metric], title=metric.upper())
+
+ print("Done.") \ No newline at end of file
diff --git a/dnn/torch/osce/stndrd/evaluation/make_tables.py b/dnn/torch/osce/stndrd/evaluation/make_tables.py
new file mode 100644
index 00000000..56080127
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/make_tables.py
@@ -0,0 +1,124 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+import numpy as np
+import matplotlib.pyplot as plt
+from prettytable import PrettyTable
+from matplotlib.patches import Patch
+
+parser = argparse.ArgumentParser()
+parser.add_argument('folder', type=str, help='path to folder with pre-calculated metrics')
+parser.add_argument('--metric', choices=['pesq', 'moc', 'warpq', 'nomad', 'laceloss', 'all'], default='all', help='default: all')
+parser.add_argument('--output', type=str, default=None, help='alternative output folder, default: folder')
+
+def load_data(folder):
+ data = dict()
+
+ if os.path.isfile(os.path.join(folder, 'results_moc.npy')):
+ data['moc'] = np.load(os.path.join(folder, 'results_moc.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_moc2.npy')):
+ data['moc2'] = np.load(os.path.join(folder, 'results_moc2.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_pesq.npy')):
+ data['pesq'] = np.load(os.path.join(folder, 'results_pesq.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_warpq.npy')):
+ data['warpq'] = np.load(os.path.join(folder, 'results_warpq.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_nomad.npy')):
+ data['nomad'] = np.load(os.path.join(folder, 'results_nomad.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_laceloss.npy')):
+ data['laceloss'] = np.load(os.path.join(folder, 'results_laceloss.npy'), allow_pickle=True).item()
+
+ return data
+
+def make_table(filename, data, title=None):
+
+ # mean values
+ tbl = PrettyTable()
+ tbl.field_names = ['bitrate (bps)', 'Opus', 'LACE', 'NoLACE']
+ for br in data.keys():
+ opus = data[br][:, 0]
+ lace = data[br][:, 1]
+ nolace = data[br][:, 2]
+ tbl.add_row([br, f"{float(opus.mean()):.3f} ({float(opus.std()):.2f})", f"{float(lace.mean()):.3f} ({float(lace.std()):.2f})", f"{float(nolace.mean()):.3f} ({float(nolace.std()):.2f})"])
+
+ with open(filename + ".txt", "w") as f:
+ f.write(str(tbl))
+
+ with open(filename + ".html", "w") as f:
+ f.write(tbl.get_html_string())
+
+ with open(filename + ".csv", "w") as f:
+ f.write(tbl.get_csv_string())
+
+ print(tbl)
+
+
+def make_diff_table(filename, data, title=None):
+
+ # mean values
+ tbl = PrettyTable()
+ tbl.field_names = ['bitrate (bps)', 'LACE - Opus', 'NoLACE - Opus']
+ for br in data.keys():
+ opus = data[br][:, 0]
+ lace = data[br][:, 1] - opus
+ nolace = data[br][:, 2] - opus
+ tbl.add_row([br, f"{float(lace.mean()):.3f} ({float(lace.std()):.2f})", f"{float(nolace.mean()):.3f} ({float(nolace.std()):.2f})"])
+
+ with open(filename + ".txt", "w") as f:
+ f.write(str(tbl))
+
+ with open(filename + ".html", "w") as f:
+ f.write(tbl.get_html_string())
+
+ with open(filename + ".csv", "w") as f:
+ f.write(tbl.get_csv_string())
+
+ print(tbl)
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+ data = load_data(args.folder)
+
+ metrics = list(data.keys()) if args.metric == 'all' else [args.metric]
+ folder = args.folder if args.output is None else args.output
+ os.makedirs(folder, exist_ok=True)
+
+ for metric in metrics:
+ print(f"Plotting data for {metric} metric...")
+ make_table(os.path.join(folder, f"table_{metric}"), data[metric])
+ make_diff_table(os.path.join(folder, f"table_diff_{metric}"), data[metric])
+
+ print("Done.") \ No newline at end of file
diff --git a/dnn/torch/osce/stndrd/evaluation/make_tables_moctest.py b/dnn/torch/osce/stndrd/evaluation/make_tables_moctest.py
new file mode 100644
index 00000000..37718068
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/make_tables_moctest.py
@@ -0,0 +1,121 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+import numpy as np
+import matplotlib.pyplot as plt
+from prettytable import PrettyTable
+from matplotlib.patches import Patch
+
+parser = argparse.ArgumentParser()
+parser.add_argument('folder', type=str, help='path to folder with pre-calculated metrics')
+parser.add_argument('--metric', choices=['pesq', 'moc', 'warpq', 'nomad', 'laceloss', 'all'], default='all', help='default: all')
+parser.add_argument('--output', type=str, default=None, help='alternative output folder, default: folder')
+
+def load_data(folder):
+ data = dict()
+
+ if os.path.isfile(os.path.join(folder, 'results_moc.npy')):
+ data['moc'] = np.load(os.path.join(folder, 'results_moc.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_pesq.npy')):
+ data['pesq'] = np.load(os.path.join(folder, 'results_pesq.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_warpq.npy')):
+ data['warpq'] = np.load(os.path.join(folder, 'results_warpq.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_nomad.npy')):
+ data['nomad'] = np.load(os.path.join(folder, 'results_nomad.npy'), allow_pickle=True).item()
+
+ if os.path.isfile(os.path.join(folder, 'results_laceloss.npy')):
+ data['laceloss'] = np.load(os.path.join(folder, 'results_laceloss.npy'), allow_pickle=True).item()
+
+ return data
+
+def make_table(filename, data, title=None):
+
+ # mean values
+ tbl = PrettyTable()
+ tbl.field_names = ['bitrate (bps)', 'Opus', 'LACE', 'NoLACE']
+ for br in data.keys():
+ opus = data[br][:, 0]
+ lace = data[br][:, 1]
+ nolace = data[br][:, 2]
+ tbl.add_row([br, f"{float(opus.mean()):.3f} ({float(opus.std()):.2f})", f"{float(lace.mean()):.3f} ({float(lace.std()):.2f})", f"{float(nolace.mean()):.3f} ({float(nolace.std()):.2f})"])
+
+ with open(filename + ".txt", "w") as f:
+ f.write(str(tbl))
+
+ with open(filename + ".html", "w") as f:
+ f.write(tbl.get_html_string())
+
+ with open(filename + ".csv", "w") as f:
+ f.write(tbl.get_csv_string())
+
+ print(tbl)
+
+
+def make_diff_table(filename, data, title=None):
+
+ # mean values
+ tbl = PrettyTable()
+ tbl.field_names = ['bitrate (bps)', 'LACE - Opus', 'NoLACE - Opus']
+ for br in data.keys():
+ opus = data[br][:, 0]
+ lace = data[br][:, 1] - opus
+ nolace = data[br][:, 2] - opus
+ tbl.add_row([br, f"{float(lace.mean()):.3f} ({float(lace.std()):.2f})", f"{float(nolace.mean()):.3f} ({float(nolace.std()):.2f})"])
+
+ with open(filename + ".txt", "w") as f:
+ f.write(str(tbl))
+
+ with open(filename + ".html", "w") as f:
+ f.write(tbl.get_html_string())
+
+ with open(filename + ".csv", "w") as f:
+ f.write(tbl.get_csv_string())
+
+ print(tbl)
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+ data = load_data(args.folder)
+
+ metrics = list(data.keys()) if args.metric == 'all' else [args.metric]
+ folder = args.folder if args.output is None else args.output
+ os.makedirs(folder, exist_ok=True)
+
+ for metric in metrics:
+ print(f"Plotting data for {metric} metric...")
+ make_table(os.path.join(folder, f"table_{metric}"), data[metric])
+ make_diff_table(os.path.join(folder, f"table_diff_{metric}"), data[metric])
+
+ print("Done.") \ No newline at end of file
diff --git a/dnn/torch/osce/stndrd/evaluation/moc.py b/dnn/torch/osce/stndrd/evaluation/moc.py
new file mode 100644
index 00000000..bf004de9
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/moc.py
@@ -0,0 +1,182 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import numpy as np
+import scipy.signal
+
+def compute_vad_mask(x, fs, stop_db=-70):
+
+ frame_length = (fs + 49) // 50
+ x = x[: frame_length * (len(x) // frame_length)]
+
+ frames = x.reshape(-1, frame_length)
+ frame_energy = np.sum(frames ** 2, axis=1)
+ frame_energy_smooth = np.convolve(frame_energy, np.ones(5) / 5, mode='same')
+
+ max_threshold = frame_energy.max() * 10 ** (stop_db/20)
+ vactive = np.ones_like(frames)
+ vactive[frame_energy_smooth < max_threshold, :] = 0
+ vactive = vactive.reshape(-1)
+
+ filter = np.sin(np.arange(frame_length) * np.pi / (frame_length - 1))
+ filter = filter / filter.sum()
+
+ mask = np.convolve(vactive, filter, mode='same')
+
+ return x, mask
+
+def convert_mask(mask, num_frames, frame_size=160, hop_size=40):
+ num_samples = frame_size + (num_frames - 1) * hop_size
+ if len(mask) < num_samples:
+ mask = np.concatenate((mask, np.zeros(num_samples - len(mask))), dtype=mask.dtype)
+ else:
+ mask = mask[:num_samples]
+
+ new_mask = np.array([np.mean(mask[i*hop_size : i*hop_size + frame_size]) for i in range(num_frames)])
+
+ return new_mask
+
+def power_spectrum(x, window_size=160, hop_size=40, window='hamming'):
+ num_spectra = (len(x) - window_size - hop_size) // hop_size
+ window = scipy.signal.get_window(window, window_size)
+ N = window_size // 2
+
+ frames = np.concatenate([x[np.newaxis, i * hop_size : i * hop_size + window_size] for i in range(num_spectra)]) * window
+ psd = np.abs(np.fft.fft(frames, axis=1)[:, :N + 1]) ** 2
+
+ return psd
+
+
+def frequency_mask(num_bands, up_factor, down_factor):
+
+ up_mask = np.zeros((num_bands, num_bands))
+ down_mask = np.zeros((num_bands, num_bands))
+
+ for i in range(num_bands):
+ up_mask[i, : i + 1] = up_factor ** np.arange(i, -1, -1)
+ down_mask[i, i :] = down_factor ** np.arange(num_bands - i)
+
+ return down_mask @ up_mask
+
+
+def rect_fb(band_limits, num_bins=None):
+ num_bands = len(band_limits) - 1
+ if num_bins is None:
+ num_bins = band_limits[-1]
+
+ fb = np.zeros((num_bands, num_bins))
+ for i in range(num_bands):
+ fb[i, band_limits[i]:band_limits[i+1]] = 1
+
+ return fb
+
+
+def compare(x, y, apply_vad=False):
+ """ Modified version of opus_compare for 16 kHz mono signals
+
+ Args:
+ x (np.ndarray): reference input signal scaled to [-1, 1]
+ y (np.ndarray): test signal scaled to [-1, 1]
+
+ Returns:
+ float: perceptually weighted error
+ """
+ # filter bank: bark scale with minimum-2-bin bands and cutoff at 7.5 kHz
+ band_limits = [0, 2, 4, 6, 7, 9, 11, 13, 15, 18, 22, 26, 31, 36, 43, 51, 60, 75]
+ num_bands = len(band_limits) - 1
+ fb = rect_fb(band_limits, num_bins=81)
+
+ # trim samples to same size
+ num_samples = min(len(x), len(y))
+ x = x[:num_samples] * 2**15
+ y = y[:num_samples] * 2**15
+
+ psd_x = power_spectrum(x) + 100000
+ psd_y = power_spectrum(y) + 100000
+
+ num_frames = psd_x.shape[0]
+
+ # average band energies
+ be_x = (psd_x @ fb.T) / np.sum(fb, axis=1)
+
+ # frequecy masking
+ f_mask = frequency_mask(num_bands, 0.1, 0.03)
+ mask_x = be_x @ f_mask.T
+
+ # temporal masking
+ for i in range(1, num_frames):
+ mask_x[i, :] += 0.5 * mask_x[i-1, :]
+
+ # apply mask
+ masked_psd_x = psd_x + 0.1 * (mask_x @ fb)
+ masked_psd_y = psd_y + 0.1 * (mask_x @ fb)
+
+ # 2-frame average
+ masked_psd_x = masked_psd_x[1:] + masked_psd_x[:-1]
+ masked_psd_y = masked_psd_y[1:] + masked_psd_y[:-1]
+
+ # distortion metric
+ re = masked_psd_y / masked_psd_x
+ im = np.log(re) ** 2
+ Eb = ((im @ fb.T) / np.sum(fb, axis=1))
+ Ef = np.mean(Eb , axis=1)
+
+ if apply_vad:
+ _, mask = compute_vad_mask(x, 16000)
+ mask = convert_mask(mask, Ef.shape[0])
+ else:
+ mask = np.ones_like(Ef)
+
+ err = np.mean(np.abs(Ef[mask > 1e-6]) ** 3) ** (1/6)
+
+ return float(err)
+
+if __name__ == "__main__":
+ import argparse
+ from scipy.io import wavfile
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('ref', type=str, help='reference wav file')
+ parser.add_argument('deg', type=str, help='degraded wav file')
+ parser.add_argument('--apply-vad', action='store_true')
+ args = parser.parse_args()
+
+
+ fs1, x = wavfile.read(args.ref)
+ fs2, y = wavfile.read(args.deg)
+
+ if max(fs1, fs2) != 16000:
+ raise ValueError('error: encountered sampling frequency diffrent from 16kHz')
+
+ x = x.astype(np.float32) / 2**15
+ y = y.astype(np.float32) / 2**15
+
+ err = compare(x, y, apply_vad=args.apply_vad)
+
+ print(f"MOC: {err}")
diff --git a/dnn/torch/osce/stndrd/evaluation/moc2.py b/dnn/torch/osce/stndrd/evaluation/moc2.py
new file mode 100644
index 00000000..7e155f01
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/moc2.py
@@ -0,0 +1,190 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import numpy as np
+import scipy.signal
+
+def compute_vad_mask(x, fs, stop_db=-70):
+
+ frame_length = (fs + 49) // 50
+ x = x[: frame_length * (len(x) // frame_length)]
+
+ frames = x.reshape(-1, frame_length)
+ frame_energy = np.sum(frames ** 2, axis=1)
+ frame_energy_smooth = np.convolve(frame_energy, np.ones(5) / 5, mode='same')
+
+ max_threshold = frame_energy.max() * 10 ** (stop_db/20)
+ vactive = np.ones_like(frames)
+ vactive[frame_energy_smooth < max_threshold, :] = 0
+ vactive = vactive.reshape(-1)
+
+ filter = np.sin(np.arange(frame_length) * np.pi / (frame_length - 1))
+ filter = filter / filter.sum()
+
+ mask = np.convolve(vactive, filter, mode='same')
+
+ return x, mask
+
+def convert_mask(mask, num_frames, frame_size=160, hop_size=40):
+ num_samples = frame_size + (num_frames - 1) * hop_size
+ if len(mask) < num_samples:
+ mask = np.concatenate((mask, np.zeros(num_samples - len(mask))), dtype=mask.dtype)
+ else:
+ mask = mask[:num_samples]
+
+ new_mask = np.array([np.mean(mask[i*hop_size : i*hop_size + frame_size]) for i in range(num_frames)])
+
+ return new_mask
+
+def power_spectrum(x, window_size=160, hop_size=40, window='hamming'):
+ num_spectra = (len(x) - window_size - hop_size) // hop_size
+ window = scipy.signal.get_window(window, window_size)
+ N = window_size // 2
+
+ frames = np.concatenate([x[np.newaxis, i * hop_size : i * hop_size + window_size] for i in range(num_spectra)]) * window
+ psd = np.abs(np.fft.fft(frames, axis=1)[:, :N + 1]) ** 2
+
+ return psd
+
+
+def frequency_mask(num_bands, up_factor, down_factor):
+
+ up_mask = np.zeros((num_bands, num_bands))
+ down_mask = np.zeros((num_bands, num_bands))
+
+ for i in range(num_bands):
+ up_mask[i, : i + 1] = up_factor ** np.arange(i, -1, -1)
+ down_mask[i, i :] = down_factor ** np.arange(num_bands - i)
+
+ return down_mask @ up_mask
+
+
+def rect_fb(band_limits, num_bins=None):
+ num_bands = len(band_limits) - 1
+ if num_bins is None:
+ num_bins = band_limits[-1]
+
+ fb = np.zeros((num_bands, num_bins))
+ for i in range(num_bands):
+ fb[i, band_limits[i]:band_limits[i+1]] = 1
+
+ return fb
+
+
+def _compare(x, y, apply_vad=False, factor=1):
+ """ Modified version of opus_compare for 16 kHz mono signals
+
+ Args:
+ x (np.ndarray): reference input signal scaled to [-1, 1]
+ y (np.ndarray): test signal scaled to [-1, 1]
+
+ Returns:
+ float: perceptually weighted error
+ """
+ # filter bank: bark scale with minimum-2-bin bands and cutoff at 7.5 kHz
+ band_limits = [factor * b for b in [0, 2, 4, 6, 7, 9, 11, 13, 15, 18, 22, 26, 31, 36, 43, 51, 60, 75]]
+ window_size = factor * 160
+ hop_size = factor * 40
+ num_bins = window_size // 2 + 1
+ num_bands = len(band_limits) - 1
+ fb = rect_fb(band_limits, num_bins=num_bins)
+
+ # trim samples to same size
+ num_samples = min(len(x), len(y))
+ x = x[:num_samples].copy() * 2**15
+ y = y[:num_samples].copy() * 2**15
+
+ psd_x = power_spectrum(x, window_size=window_size, hop_size=hop_size) + 100000
+ psd_y = power_spectrum(y, window_size=window_size, hop_size=hop_size) + 100000
+
+ num_frames = psd_x.shape[0]
+
+ # average band energies
+ be_x = (psd_x @ fb.T) / np.sum(fb, axis=1)
+
+ # frequecy masking
+ f_mask = frequency_mask(num_bands, 0.1, 0.03)
+ mask_x = be_x @ f_mask.T
+
+ # temporal masking
+ for i in range(1, num_frames):
+ mask_x[i, :] += (0.5 ** factor) * mask_x[i-1, :]
+
+ # apply mask
+ masked_psd_x = psd_x + 0.1 * (mask_x @ fb)
+ masked_psd_y = psd_y + 0.1 * (mask_x @ fb)
+
+ # 2-frame average
+ masked_psd_x = masked_psd_x[1:] + masked_psd_x[:-1]
+ masked_psd_y = masked_psd_y[1:] + masked_psd_y[:-1]
+
+ # distortion metric
+ re = masked_psd_y / masked_psd_x
+ #im = re - np.log(re) - 1
+ im = np.log(re) ** 2
+ Eb = ((im @ fb.T) / np.sum(fb, axis=1))
+ Ef = np.mean(Eb ** 1, axis=1)
+
+ if apply_vad:
+ _, mask = compute_vad_mask(x, 16000)
+ mask = convert_mask(mask, Ef.shape[0])
+ else:
+ mask = np.ones_like(Ef)
+
+ err = np.mean(np.abs(Ef[mask > 1e-6]) ** 3) ** (1/6)
+
+ return float(err)
+
+def compare(x, y, apply_vad=False):
+ err = np.linalg.norm([_compare(x, y, apply_vad=apply_vad, factor=1)], ord=2)
+ return err
+
+if __name__ == "__main__":
+ import argparse
+ from scipy.io import wavfile
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('ref', type=str, help='reference wav file')
+ parser.add_argument('deg', type=str, help='degraded wav file')
+ parser.add_argument('--apply-vad', action='store_true')
+ args = parser.parse_args()
+
+
+ fs1, x = wavfile.read(args.ref)
+ fs2, y = wavfile.read(args.deg)
+
+ if max(fs1, fs2) != 16000:
+ raise ValueError('error: encountered sampling frequency diffrent from 16kHz')
+
+ x = x.astype(np.float32) / 2**15
+ y = y.astype(np.float32) / 2**15
+
+ err = compare(x, y, apply_vad=args.apply_vad)
+
+ print(f"MOC: {err}")
diff --git a/dnn/torch/osce/stndrd/evaluation/process_dataset.sh b/dnn/torch/osce/stndrd/evaluation/process_dataset.sh
new file mode 100755
index 00000000..a490da93
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/process_dataset.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+if [ ! -f "$PYTHON" ]
+then
+ echo "PYTHON variable does not link to a file. Please point it to your python executable."
+ exit 1
+fi
+
+if [ ! -f "$TESTMODEL" ]
+then
+ echo "TESTMODEL variable does not link to a file. Please point it to your copy of test_model.py"
+ exit 1
+fi
+
+if [ ! -f "$OPUSDEMO" ]
+then
+ echo "OPUSDEMO variable does not link to a file. Please point it to your patched version of opus_demo."
+ exit 1
+fi
+
+if [ ! -f "$LACE" ]
+then
+ echo "LACE variable does not link to a file. Please point it to your copy of the LACE checkpoint."
+ exit 1
+fi
+
+if [ ! -f "$NOLACE" ]
+then
+ echo "LACE variable does not link to a file. Please point it to your copy of the NOLACE checkpoint."
+ exit 1
+fi
+
+case $# in
+ 2) INPUT=$1; OUTPUT=$2;;
+ *) echo "process_dataset.sh <input folder> <output folder>"; exit 1;;
+esac
+
+if [ -d $OUTPUT ]
+then
+ echo "output folder $OUTPUT exists, aborting..."
+ exit 1
+fi
+
+mkdir -p $OUTPUT
+
+if [ "$BITRATES" == "" ]
+then
+ BITRATES=( 6000 7500 9000 12000 15000 18000 24000 32000 )
+ echo "BITRATES variable not defined. Proceeding with default bitrates ${BITRATES[@]}."
+fi
+
+
+echo "LACE=${LACE}" > ${OUTPUT}/info.txt
+echo "NOLACE=${NOLACE}" >> ${OUTPUT}/info.txt
+
+ITEMFILE=${OUTPUT}/items.txt
+BITRATEFILE=${OUTPUT}/bitrates.txt
+
+FPROCESSING=${OUTPUT}/processing
+FCLEAN=${OUTPUT}/clean
+FOPUS=${OUTPUT}/opus
+FLACE=${OUTPUT}/lace
+FNOLACE=${OUTPUT}/nolace
+
+mkdir -p $FPROCESSING $FCLEAN $FOPUS $FLACE $FNOLACE
+
+echo "${BITRATES[@]}" > $BITRATEFILE
+
+for fn in $(find $INPUT -type f -name "*.wav")
+do
+ UUID=$(uuid)
+ echo "$UUID $fn" >> $ITEMFILE
+ PIDS=( )
+ for br in ${BITRATES[@]}
+ do
+ # run opus
+ pfolder=${FPROCESSING}/${UUID}_${br}
+ mkdir -p $pfolder
+ sox $fn -c 1 -r 16000 -b 16 -e signed-integer $pfolder/clean.s16
+ (cd ${pfolder} && $OPUSDEMO voip 16000 1 $br clean.s16 noisy.s16)
+
+ # copy clean and opus
+ sox -c 1 -r 16000 -b 16 -e signed-integer $pfolder/clean.s16 $FCLEAN/${UUID}_${br}_clean.wav
+ sox -c 1 -r 16000 -b 16 -e signed-integer $pfolder/noisy.s16 $FOPUS/${UUID}_${br}_opus.wav
+
+ # run LACE
+ $PYTHON $TESTMODEL $pfolder $LACE $FLACE/${UUID}_${br}_lace.wav &
+ PIDS+=( "$!" )
+
+ # run NoLACE
+ $PYTHON $TESTMODEL $pfolder $NOLACE $FNOLACE/${UUID}_${br}_nolace.wav &
+ PIDS+=( "$!" )
+ done
+ for pid in ${PIDS[@]}
+ do
+ wait $pid
+ done
+done
diff --git a/dnn/torch/osce/stndrd/evaluation/run_nomad.py b/dnn/torch/osce/stndrd/evaluation/run_nomad.py
new file mode 100644
index 00000000..0267bc92
--- /dev/null
+++ b/dnn/torch/osce/stndrd/evaluation/run_nomad.py
@@ -0,0 +1,138 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import tempfile
+import shutil
+
+import pandas as pd
+from scipy.spatial.distance import cdist
+from scipy.io import wavfile
+import numpy as np
+
+from nomad_audio.nomad import Nomad
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('folder', type=str, help='folder with processed items')
+parser.add_argument('--full-reference', action='store_true', help='use NOMAD as full-reference metric')
+parser.add_argument('--device', type=str, default=None, help='device for Nomad')
+
+
+def get_bitrates(folder):
+ with open(os.path.join(folder, 'bitrates.txt')) as f:
+ x = f.read()
+
+ bitrates = [int(y) for y in x.rstrip('\n').split()]
+
+ return bitrates
+
+def get_itemlist(folder):
+ with open(os.path.join(folder, 'items.txt')) as f:
+ lines = f.readlines()
+
+ items = [x.split()[0] for x in lines]
+
+ return items
+
+
+def nomad_wrapper(ref_folder, deg_folder, full_reference=False, ref_embeddings=None, device=None):
+ model = Nomad(device=device)
+ if not full_reference:
+ results = model.predict(nmr=ref_folder, deg=deg_folder)[0].to_dict()['NOMAD']
+ return results, None
+ else:
+ if ref_embeddings is None:
+ print(f"Computing reference embeddings from {ref_folder}")
+ ref_data = pd.DataFrame(sorted(os.listdir(ref_folder)))
+ ref_data.columns = ['filename']
+ ref_data['filename'] = [os.path.join(ref_folder, x) for x in ref_data['filename']]
+ ref_embeddings = model.get_embeddings_csv(model.model, ref_data).set_index('filename')
+
+ print(f"Computing degraded embeddings from {deg_folder}")
+ deg_data = pd.DataFrame(sorted(os.listdir(deg_folder)))
+ deg_data.columns = ['filename']
+ deg_data['filename'] = [os.path.join(deg_folder, x) for x in deg_data['filename']]
+ deg_embeddings = model.get_embeddings_csv(model.model, deg_data).set_index('filename')
+
+ dist = np.diag(cdist(ref_embeddings, deg_embeddings)) # wasteful
+ test_files = [x.split('/')[-1].split('.')[0] for x in deg_embeddings.index]
+
+ results = dict(zip(test_files, dist))
+
+ return results, ref_embeddings
+
+
+
+
+def nomad_process_all(folder, full_reference=False, device=None):
+ bitrates = get_bitrates(folder)
+ items = get_itemlist(folder)
+ with tempfile.TemporaryDirectory() as dir:
+ cleandir = os.path.join(dir, 'clean')
+ opusdir = os.path.join(dir, 'opus')
+ lacedir = os.path.join(dir, 'lace')
+ nolacedir = os.path.join(dir, 'nolace')
+
+ # prepare files
+ for d in [cleandir, opusdir, lacedir, nolacedir]: os.makedirs(d)
+ for br in bitrates:
+ for item in items:
+ for cond in ['clean', 'opus', 'lace', 'nolace']:
+ shutil.copyfile(os.path.join(folder, cond, f"{item}_{br}_{cond}.wav"), os.path.join(dir, cond, f"{item}_{br}.wav"))
+
+ nomad_opus, ref_embeddings = nomad_wrapper(cleandir, opusdir, full_reference=full_reference, ref_embeddings=None)
+ nomad_lace, ref_embeddings = nomad_wrapper(cleandir, lacedir, full_reference=full_reference, ref_embeddings=ref_embeddings)
+ nomad_nolace, ref_embeddings = nomad_wrapper(cleandir, nolacedir, full_reference=full_reference, ref_embeddings=ref_embeddings)
+
+ results = dict()
+ for br in bitrates:
+ results[br] = np.zeros((len(items), 3))
+ for i, item in enumerate(items):
+ key = f"{item}_{br}"
+ results[br][i, 0] = nomad_opus[key]
+ results[br][i, 1] = nomad_lace[key]
+ results[br][i, 2] = nomad_nolace[key]
+
+ return results
+
+
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ items = get_itemlist(args.folder)
+ bitrates = get_bitrates(args.folder)
+
+ results = nomad_process_all(args.folder, full_reference=args.full_reference, device=args.device)
+
+ np.save(os.path.join(args.folder, f'results_nomad.npy'), results)
+
+ print("Done.")
diff --git a/dnn/torch/osce/stndrd/presentation/endoscopy.py b/dnn/torch/osce/stndrd/presentation/endoscopy.py
new file mode 100644
index 00000000..05dd4750
--- /dev/null
+++ b/dnn/torch/osce/stndrd/presentation/endoscopy.py
@@ -0,0 +1,205 @@
+""" module for inspecting models during inference """
+
+import os
+
+import yaml
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+
+import torch
+import numpy as np
+
+# stores entries {key : {'fid' : fid, 'fs' : fs, 'dim' : dim, 'dtype' : dtype}}
+_state = dict()
+_folder = 'endoscopy'
+
+def get_gru_gates(gru, input, state):
+ hidden_size = gru.hidden_size
+
+ direct = torch.matmul(gru.weight_ih_l0, input.squeeze())
+ recurrent = torch.matmul(gru.weight_hh_l0, state.squeeze())
+
+ # reset gate
+ start, stop = 0 * hidden_size, 1 * hidden_size
+ reset_gate = torch.sigmoid(direct[start : stop] + gru.bias_ih_l0[start : stop] + recurrent[start : stop] + gru.bias_hh_l0[start : stop])
+
+ # update gate
+ start, stop = 1 * hidden_size, 2 * hidden_size
+ update_gate = torch.sigmoid(direct[start : stop] + gru.bias_ih_l0[start : stop] + recurrent[start : stop] + gru.bias_hh_l0[start : stop])
+
+ # new gate
+ start, stop = 2 * hidden_size, 3 * hidden_size
+ new_gate = torch.tanh(direct[start : stop] + gru.bias_ih_l0[start : stop] + reset_gate * (recurrent[start : stop] + gru.bias_hh_l0[start : stop]))
+
+ return {'reset_gate' : reset_gate, 'update_gate' : update_gate, 'new_gate' : new_gate}
+
+
+def init(folder='endoscopy'):
+ """ sets up output folder for endoscopy data """
+
+ global _folder
+ _folder = folder
+
+ if not os.path.exists(folder):
+ os.makedirs(folder)
+ else:
+ print(f"warning: endoscopy folder {folder} exists. Content may be lost or inconsistent results may occur.")
+
+def write_data(key, data, fs):
+ """ appends data to previous data written under key """
+
+ global _state
+
+ # convert to numpy if torch.Tensor is given
+ if isinstance(data, torch.Tensor):
+ data = data.detach().numpy()
+
+ if not key in _state:
+ _state[key] = {
+ 'fid' : open(os.path.join(_folder, key + '.bin'), 'wb'),
+ 'fs' : fs,
+ 'dim' : tuple(data.shape),
+ 'dtype' : str(data.dtype)
+ }
+
+ with open(os.path.join(_folder, key + '.yml'), 'w') as f:
+ f.write(yaml.dump({'fs' : fs, 'dim' : tuple(data.shape), 'dtype' : str(data.dtype).split('.')[-1]}))
+ else:
+ if _state[key]['fs'] != fs:
+ raise ValueError(f"fs changed for key {key}: {_state[key]['fs']} vs. {fs}")
+ if _state[key]['dtype'] != str(data.dtype):
+ raise ValueError(f"dtype changed for key {key}: {_state[key]['dtype']} vs. {str(data.dtype)}")
+ if _state[key]['dim'] != tuple(data.shape):
+ raise ValueError(f"dim changed for key {key}: {_state[key]['dim']} vs. {tuple(data.shape)}")
+
+ _state[key]['fid'].write(data.tobytes())
+
+def close(folder='endoscopy'):
+ """ clean up """
+ for key in _state.keys():
+ _state[key]['fid'].close()
+
+
+def read_data(folder='endoscopy'):
+ """ retrieves written data as numpy arrays """
+
+
+ keys = [name[:-4] for name in os.listdir(folder) if name.endswith('.yml')]
+
+ return_dict = dict()
+
+ for key in keys:
+ with open(os.path.join(folder, key + '.yml'), 'r') as f:
+ value = yaml.load(f.read(), yaml.FullLoader)
+
+ with open(os.path.join(folder, key + '.bin'), 'rb') as f:
+ data = np.frombuffer(f.read(), dtype=value['dtype'])
+
+ value['data'] = data.reshape((-1,) + value['dim'])
+
+ return_dict[key] = value
+
+ return return_dict
+
+def get_best_reshape(shape, target_ratio=1):
+ """ calculated the best 2d reshape of shape given the target ratio (rows/cols)"""
+
+ if len(shape) > 1:
+ pixel_count = 1
+ for s in shape:
+ pixel_count *= s
+ else:
+ pixel_count = shape[0]
+
+ if pixel_count == 1:
+ return (1,)
+
+ num_columns = int((pixel_count / target_ratio)**.5)
+
+ while (pixel_count % num_columns):
+ num_columns -= 1
+
+ num_rows = pixel_count // num_columns
+
+ return (num_rows, num_columns)
+
+def get_type_and_shape(shape):
+
+ # can happen if data is one dimensional
+ if len(shape) == 0:
+ shape = (1,)
+
+ # calculate pixel count
+ if len(shape) > 1:
+ pixel_count = 1
+ for s in shape:
+ pixel_count *= s
+ else:
+ pixel_count = shape[0]
+
+ if pixel_count == 1:
+ return 'plot', (1, )
+
+ # stay with shape if already 2-dimensional
+ if len(shape) == 2:
+ if (shape[0] != pixel_count) or (shape[1] != pixel_count):
+ return 'image', shape
+
+ return 'image', get_best_reshape(shape)
+
+def make_animation(data, filename, start_index=80, stop_index=-80, interval=20, half_signal_window_length=80):
+
+ # determine plot setup
+ num_keys = len(data.keys())
+
+ num_rows = int((num_keys * 3/4) ** .5)
+
+ num_cols = (num_keys + num_rows - 1) // num_rows
+
+ fig, axs = plt.subplots(num_rows, num_cols)
+ fig.set_size_inches(num_cols * 5, num_rows * 5)
+
+ display = dict()
+
+ fs_max = max([val['fs'] for val in data.values()])
+
+ num_samples = max([val['data'].shape[0] for val in data.values()])
+
+ keys = sorted(data.keys())
+
+ # inspect data
+ for i, key in enumerate(keys):
+ axs[i // num_cols, i % num_cols].title.set_text(key)
+
+ display[key] = dict()
+
+ display[key]['type'], display[key]['shape'] = get_type_and_shape(data[key]['dim'])
+ display[key]['down_factor'] = data[key]['fs'] / fs_max
+
+ start_index = max(start_index, half_signal_window_length)
+ while stop_index < 0:
+ stop_index += num_samples
+
+ stop_index = min(stop_index, num_samples - half_signal_window_length)
+
+ # actual plotting
+ frames = []
+ for index in range(start_index, stop_index):
+ ims = []
+ for i, key in enumerate(keys):
+ feature_index = int(round(index * display[key]['down_factor']))
+
+ if display[key]['type'] == 'plot':
+ ims.append(axs[i // num_cols, i % num_cols].plot(data[key]['data'][index - half_signal_window_length : index + half_signal_window_length], marker='P', markevery=[half_signal_window_length], animated=True, color='blue')[0])
+
+ elif display[key]['type'] == 'image':
+ ims.append(axs[i // num_cols, i % num_cols].imshow(data[key]['data'][index].reshape(display[key]['shape']), animated=True))
+
+ frames.append(ims)
+
+ ani = animation.ArtistAnimation(fig, frames, interval=interval, blit=True, repeat_delay=1000)
+
+ if not filename.endswith('.mp4'):
+ filename += '.mp4'
+
+ ani.save(filename) \ No newline at end of file
diff --git a/dnn/torch/osce/stndrd/presentation/lace_demo.ipynb b/dnn/torch/osce/stndrd/presentation/lace_demo.ipynb
new file mode 100644
index 00000000..5e1820a6
--- /dev/null
+++ b/dnn/torch/osce/stndrd/presentation/lace_demo.ipynb
@@ -0,0 +1,313 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib.animation\n",
+ "from scipy.io import wavfile\n",
+ "import scipy.signal\n",
+ "import torch\n",
+ "\n",
+ "from playback import make_playback_animation\n",
+ "from endoscopy import read_data\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plt.rcParams.update({\n",
+ " \"text.usetex\": True,\n",
+ " \"font.family\": \"Helvetica\",\n",
+ " \"font.size\": 24\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 432x288 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 513.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 511.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# comparison opus@6kb/s vs. LACE@6kb/s\n",
+ "for name in ['opus_6kbps', 'lace_6kbps']:\n",
+ " fs, y = wavfile.read(f'lace_demo/{name}.wav')\n",
+ " spec, freqs, t, im = plt.specgram(y, NFFT=512, cmap='inferno', noverlap=256 + 128, pad_to=4096)\n",
+ " spec = 10*np.log10(spec)\n",
+ " \n",
+ " make_playback_animation(f'lace_demo/{name}_spec.mp4', spec, len(y)/16)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = read_data('lace_demo/endoscopy/')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_adaconv_impz(data, prefix, frame_size=80, overlap_size=40):\n",
+ " win1 = .5 + .5 * np.cos((np.arange(overlap_size) + 0.5) * torch.pi / overlap_size)\n",
+ " win1 = win1[:, np.newaxis]\n",
+ " win2 = win1[::-1]\n",
+ " kernels = data[prefix + '_kernels']['data'][0] * data[prefix + '_gains']['data'][0, :, np.newaxis]\n",
+ " kernels = kernels[:, ::-1]\n",
+ " num_frames = len(kernels)\n",
+ " impz = np.repeat(kernels, frame_size, 0)\n",
+ " for i in range(num_frames - 2, 0, -1):\n",
+ " idx = i * frame_size\n",
+ " impz[idx : idx + overlap_size] = win2 * impz[idx : idx + overlap_size] + win1 * impz[idx - overlap_size : idx]\n",
+ " \n",
+ " return impz"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_adacomb_impz(data, prefix, frame_size=80, overlap_size=40):\n",
+ " win1 = .5 + .5 * np.cos((np.arange(overlap_size) + 0.5) * torch.pi / overlap_size)\n",
+ " win1 = win1[:, np.newaxis]\n",
+ " win2 = win1[::-1]\n",
+ " kernels = data[prefix + '_kernels']['data'][0]\n",
+ " gg = data[prefix + '_global_conv_gains']['data'][0]\n",
+ " g = data[prefix + '_gains']['data'][0]\n",
+ " lags = data[prefix + '_lags']['data'][0]\n",
+ " \n",
+ " num_frames = len(kernels)\n",
+ " max_lag = int(lags.max())\n",
+ " kernel_size = kernels.shape[1]\n",
+ " padding = kernel_size // 2\n",
+ " \n",
+ " impz = np.zeros((num_frames, max_lag + padding + 1))\n",
+ " for i in range(num_frames):\n",
+ " p = int(lags[i])\n",
+ " impz[i, 0] = gg[i]\n",
+ " impz[i, p - padding : p - padding + kernel_size] = gg[i] * g[i] * kernels[i, ::-1]\n",
+ " \n",
+ " impz = np.repeat(impz, frame_size, 0)\n",
+ " for i in range(num_frames - 2, 0, -1):\n",
+ " idx = i * frame_size\n",
+ " impz[idx : idx + overlap_size] = win2 * impz[idx : idx + overlap_size] + win1 * impz[idx - overlap_size : idx]\n",
+ " \n",
+ " return impz"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "impz_comb1 = get_adacomb_impz(data, 'limited_adaptive_comb1d_1')\n",
+ "impz_comb2 = get_adacomb_impz(data, 'limited_adaptive_comb1d_2')\n",
+ "impz_conv1 = get_adaconv_impz(data, 'limited_adaptive_conv1d_1')\n",
+ "\n",
+ "phi = data['hidden_features']['data'][0, 0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 864x1296 with 8 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# LACE animation\n",
+ "\n",
+ "fig, axs = plt.subplots(4, 2)\n",
+ "frame_duration=40\n",
+ "fig.set_size_inches(12, 18)\n",
+ "axs[0, 0].set_xlabel('Tap')\n",
+ "axs[0, 0].set_ylabel('Amplitude')\n",
+ "axs[0, 1].set_xlabel('Frequency (kHz)')\n",
+ "axs[0, 1].set_ylabel('Amplitude (dB)')\n",
+ "axs[1, 0].set_xlabel('Tap')\n",
+ "axs[1, 0].set_ylabel('Amplitude')\n",
+ "axs[1, 1].set_xlabel('Frequency (kHz)')\n",
+ "axs[1, 1].set_ylabel('Amplitude (dB)')\n",
+ "axs[2, 0].set_xlabel('Tap')\n",
+ "axs[2, 0].set_ylabel('Amplitude')\n",
+ "axs[2, 1].set_xlabel('Frequency (kHz)')\n",
+ "axs[2, 1].set_ylabel('Amplitude (dB)')\n",
+ "axs[3, 0].set_xlabel('Dim')\n",
+ "axs[3, 1].set_visible(False)\n",
+ "fig.tight_layout()\n",
+ "\n",
+ "frames = []\n",
+ "for i in range(12800, 14000, 4):\n",
+ " f = impz_comb1[i]\n",
+ " w, h = scipy.signal.freqz(f, fs=16000)\n",
+ " frame = axs[0, 0].plot(f, 'b') + axs[0, 1].plot(w/1000, 10 * np.log10(np.abs(h)), 'b')\n",
+ " \n",
+ " f = impz_comb2[i]\n",
+ " w, h = scipy.signal.freqz(f, fs=16000)\n",
+ " frame += axs[1, 0].plot(f, 'b') + axs[1, 1].plot(w/1000, 10 * np.log10(np.abs(h)), 'b')\n",
+ " \n",
+ " f = impz_conv1[i]\n",
+ " w, h = scipy.signal.freqz(f, fs=16000)\n",
+ " frame += axs[2, 0].plot(f, 'b') + axs[2, 1].plot(w/1000, 10 * np.log10(np.abs(h)), 'b')\n",
+ " \n",
+ " frame += axs[3, 0].plot(phi[i//80], 'b')\n",
+ " \n",
+ " frames.append(frame)\n",
+ " \n",
+ "ani = matplotlib.animation.ArtistAnimation(fig, frames, blit=True, interval=frame_duration)\n",
+ "ani.save('lace_demo/responses2.mp4', dpi=720)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 864x1296 with 8 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# static plot for Jean-Marc's on-site presentation\n",
+ "\n",
+ "fig, axs = plt.subplots(4, 2)\n",
+ "frame_duration=40\n",
+ "fig.set_size_inches(12, 18)\n",
+ "axs[0, 0].set_xlabel('Tap')\n",
+ "axs[0, 0].set_ylabel('Amplitude')\n",
+ "axs[0, 1].set_xlabel('Frequency (kHz)')\n",
+ "axs[0, 1].set_ylabel('Amplitude (dB)')\n",
+ "axs[1, 0].set_xlabel('Tap')\n",
+ "axs[1, 0].set_ylabel('Amplitude')\n",
+ "axs[1, 1].set_xlabel('Frequency (kHz)')\n",
+ "axs[1, 1].set_ylabel('Amplitude (dB)')\n",
+ "axs[2, 0].set_xlabel('Tap')\n",
+ "axs[2, 0].set_ylabel('Amplitude')\n",
+ "axs[2, 1].set_xlabel('Frequency (kHz)')\n",
+ "axs[2, 1].set_ylabel('Amplitude (dB)')\n",
+ "axs[3, 0].set_xlabel('Tap')\n",
+ "axs[3, 0].set_ylabel('Amplitude')\n",
+ "axs[3, 1].set_xlabel('Frequency (kHz)')\n",
+ "axs[3, 1].set_ylabel('Amplitude (dB)')\n",
+ "fig.tight_layout()\n",
+ "i=10*80\n",
+ "\n",
+ "f = impz_comb1[i]\n",
+ "w, h = scipy.signal.freqz(f, fs=16000)\n",
+ "axs[0, 0].plot(f, 'b')\n",
+ "axs[0, 1].plot(w/1000, 10 * np.log10(np.abs(h)), 'b')\n",
+ "\n",
+ "f = impz_comb2[i]\n",
+ "w, h = scipy.signal.freqz(f, fs=16000)\n",
+ "axs[1, 0].plot(f, 'b')\n",
+ "axs[1, 1].plot(w/1000, 10 * np.log10(np.abs(h)), 'b')\n",
+ "\n",
+ "f = impz_conv1[i]\n",
+ "w, h = scipy.signal.freqz(f, fs=16000)\n",
+ "axs[2, 0].plot(f, 'b')\n",
+ "axs[2, 1].plot(w/1000, 10 * np.log10(np.abs(h)), 'b')\n",
+ "\n",
+ "f = np.convolve(np.convolve(impz_comb1[i], impz_comb2[i], mode='full'), impz_conv1[i])\n",
+ "w, h = scipy.signal.freqz(f, fs=16000)\n",
+ "axs[3, 0].plot(f, 'b')\n",
+ "axs[3, 1].plot(w/1000, 10 * np.log10(np.abs(h)), 'b')\n",
+ "\n",
+ "fig.savefig('plots/lace_snapshot_unvoiced.png')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "torch",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/dnn/torch/osce/stndrd/presentation/linear_prediction.ipynb b/dnn/torch/osce/stndrd/presentation/linear_prediction.ipynb
new file mode 100644
index 00000000..a465fb9e
--- /dev/null
+++ b/dnn/torch/osce/stndrd/presentation/linear_prediction.ipynb
@@ -0,0 +1,320 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib.animation\n",
+ "from scipy.io import wavfile\n",
+ "import scipy.signal\n",
+ "import torch\n",
+ "\n",
+ "from playback import make_playback_animation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def load_lpcnet_features(feature_file, version=2):\n",
+ " if version == 2 or version == 3:\n",
+ " layout = {\n",
+ " 'cepstrum': [0,18],\n",
+ " 'periods': [18, 19],\n",
+ " 'pitch_corr': [19, 20],\n",
+ " 'lpc': [20, 36]\n",
+ " }\n",
+ " frame_length = 36\n",
+ "\n",
+ " elif version == 1:\n",
+ " layout = {\n",
+ " 'cepstrum': [0,18],\n",
+ " 'periods': [36, 37],\n",
+ " 'pitch_corr': [37, 38],\n",
+ " 'lpc': [39, 55],\n",
+ " }\n",
+ " frame_length = 55\n",
+ " else:\n",
+ " raise ValueError(f'unknown feature version: {version}')\n",
+ "\n",
+ "\n",
+ " raw_features = torch.from_numpy(np.fromfile(feature_file, dtype='float32'))\n",
+ " raw_features = raw_features.reshape((-1, frame_length))\n",
+ "\n",
+ " features = torch.cat(\n",
+ " [\n",
+ " raw_features[:, layout['cepstrum'][0] : layout['cepstrum'][1]],\n",
+ " raw_features[:, layout['pitch_corr'][0] : layout['pitch_corr'][1]]\n",
+ " ],\n",
+ " dim=1\n",
+ " )\n",
+ "\n",
+ " lpcs = raw_features[:, layout['lpc'][0] : layout['lpc'][1]]\n",
+ " if version < 3:\n",
+ " periods = (0.1 + 50 * raw_features[:, layout['periods'][0] : layout['periods'][1]] + 100).long()\n",
+ " else:\n",
+ " periods = torch.round(torch.clip(256./2**(raw_features[:, layout['periods'][0] : layout['periods'][1]] + 1.5), 32, 256)).long()\n",
+ "\n",
+ " return {'features' : features, 'periods' : periods, 'lpcs' : lpcs}\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def run_lpc(signal, lpcs, frame_length=160):\n",
+ " num_frames, lpc_order = lpcs.shape\n",
+ "\n",
+ " prediction = np.concatenate(\n",
+ " [- np.convolve(signal[i * frame_length : (i + 1) * frame_length + lpc_order - 1], lpcs[i], mode='valid') for i in range(num_frames)]\n",
+ " )\n",
+ " error = signal[lpc_order :] - prediction\n",
+ "\n",
+ " return prediction, error"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lpcnet_features = load_lpcnet_features('lp/features.f32')\n",
+ "\n",
+ "features = lpcnet_features['features'].numpy()\n",
+ "periods = lpcnet_features['periods'].squeeze(-1).numpy()\n",
+ "lpcs = lpcnet_features['lpcs'].numpy()\n",
+ "\n",
+ "x = np.fromfile('data/a3_short.pcm', dtype=np.int16).astype(np.float32) / 2**15\n",
+ "x = np.concatenate((np.zeros(80), x, np.zeros(320)))\n",
+ "x_preemph = x.copy()\n",
+ "x_preemph[1:] -= 0.85 * x_preemph[:-1]\n",
+ "\n",
+ "num_frames = features.shape[0]\n",
+ "x = x[:160 * num_frames]\n",
+ "x_preemph = x_preemph[:160 * num_frames]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# short-term prediction\n",
+ "pred, error = run_lpc(np.concatenate((np.zeros(16), x_preemph)), lpcs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# long-term prediction\n",
+ "offset = 256\n",
+ "padded_error = np.concatenate((np.zeros(offset), error))\n",
+ "ltp_error = padded_error.copy()\n",
+ "for i, p in enumerate(list(periods)):\n",
+ " t0 = i * 160 + offset\n",
+ " t1 = t0 + 160\n",
+ " \n",
+ " past = padded_error[t0 - p : t1 - p]\n",
+ " current = padded_error[t0 : t1]\n",
+ " \n",
+ " gain = np.dot(past, current) / (np.dot(past, past) + 1e-6)\n",
+ " ltp_error[t0 : t1] -= gain * past\n",
+ " \n",
+ " \n",
+ "ltp_error = ltp_error[offset:]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 432x288 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "spec_x, freqs, t, im = plt.specgram(x, NFFT=512, cmap='inferno')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 432x288 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "spec_stp_error, freqs, t, im = plt.specgram(error, NFFT=512, cmap='inferno')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 432x288 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "spec_ltp_error, freqs, t, im = plt.specgram(ltp_error, NFFT=512, cmap='inferno')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_in = (x * (2 ** 15)).astype(np.int16)\n",
+ "x_stp = (error * (2 ** 15)).astype(np.int16)\n",
+ "x_ltp = (ltp_error * (2 ** 15)).astype(np.int16)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "wavfile.write('lp/x_in.wav', 16000, x_in)\n",
+ "wavfile.write('lp/x_stp.wav', 16000, x_stp)\n",
+ "wavfile.write('lp/x_ltp.wav', 16000, x_ltp)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 432x288 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAEeCAYAAACzJ9OtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVyElEQVR4nO3dS28kWXre8ec9Jy6ZTF6K1aWamsaMpYYg22NJEKTRQjAsbwRo4y9gw/BSn8qAoW+glfbaCtDGlmzAtmSNZjTTM9PqupHMS0Sc83oRLBbbqInIKDlDqMD/12gwWRkkX5JJPDiXeI+5uwAAOJXwT10AAGDZCBoAwEkRNACAkyJoAAAnRdAAAE6KoAEAnFQx9KRZ8Qv3Prt39v+/HADA0jCiAQCcFEEDADgpggYAcFIEDQDgpAgaAMBJETQAgJMiaAAAJ0XQAABOiqABAJwUQQMAOCmCBgBwUgQNAOCkCBoAwEkRNACAkyJoAAAnRdAAAE6KoAEAnBRBAwA4qdGgMavmqAMAsFCjQVPGp3PUAQBYqNGgqYuLOeoAACzUpDUaU3GqOgAACzUaNBfxxRx1AAAWajRonvnn798xRjQAgGlGg+bSNw+Pz+vvnrQYAMDyjO86u78khAvV8fLkBQEAlmU0aNYhvn9sVyctBgCwPKNB81ndr8t43qn29ckLAgAsy2jQVPdXuFyf5c9OXQ8AYGFGg2bzsNEs69JWp60GALA4R9+wGcK57JSVAAAWaXzXWXBJUgwr1YFmzwCAacZ7nQVJiqripSqCBgAw0WhytC6ZRZVhrWD0OwMATDMaNNmlYGtFK3XTJbm6OeoCACzEaNDskyn7Qa48Rz0AgIUZDZq7TnLfa9e9UnKfoyYAwIKMBk2T+3Bp041WbAYAAEw0vr3Z+rtngtWMaAAAk40GzeF+RFPElW4zGwEAANOMBs0u9UGTvdNOzckLAgAsy2jQvOr6cKnCRi6mzgAA04wGTbrf1lyEleLxrdEAAJB0RNC8C5c2b09eDABgecbXaO7XZdq809Z2Jy8IALAs450BbC9Jaro3+pn93ckLAgAsy2jQ2P0lnncqVJ+8IADAsowGzdZuJYlmmgCAjzIaNG/11f2jqEZsCAAATHPEDZuv7h8ludPBGQAwzfjBZ/n9TrMm3560GADA8owGTRU2D4+7fDhpMQCA5Rlvqplu5qgDALBQk3rKcMomAGCqSUFj9DoDAEw03lTz0bpMl2lBAwCYZjRosr8/gybl/UmLAQAsz2jQuLdz1AEAWKgjgubRlmZu2AQATHTE6v77S1yMbgAA0xzRvdmmXA4AwDeMB429PxqA9RoAwFRH7DqjYzMA4OMdMRfmD4++OY0GAMC4iS1oOPwMADANq/sAgJMiaAAAJ3XErrPVHHUAABbqiPto4hx1AAAWanzqzJhdAwB8vPH7aDL30QAAPt4RazTlHHUAABZqNGhi2MxRBwBgoSYdfAYAwFSs9AMATmo0aIJVc9QBAFio0aAp4nqOOgAACzUaNOflt+aoAwCwUKNBU+lsjjoAAAs1GjQ36adz1AEAWKjRoCkDIxoAwMcbDZqzcD1HHQCAhRoNmgsnaAAAH280aBLHNwMA/hHGe52pmKMOAMBCjXcGcLrUAAA+3nhTTctz1AEAWKjxFjTO1BkA4OONb2/2973OjAabAICJxtdoZO8fGw02AQDTjHcGeLTrLARGNACAaUaD5rp4Hy4x1NKjEQ4AAGNGg+ZxrLhnSX66agAAizPpJpkqbk5VBwBgoQaDxlQoPBrSrCN9zwAA0wyPaKyQPZo8q+381PUAABZmMGjc998c0fhGwZg+AwAcb/wo50dJE1XIlU5aEABgWYbXaGylq0e3zmRlmZWnrgkAsCCjI5pVeLed2XSZLxVoQwMAmGCkY2ZW5yZTIVlQrVKBEQ0AYIKREU1QYa4YL+TeqpApeztPZQCARRgMmmC1skxmhSRXlpTzfp7KAACLMBg0q/KZovl96xmptigZJ24CAI43mBpl6I8FsPtwiWaKYXX6qgAAizEydRYlSaYoujYDAD7G6DxYcru/STPoSRnvjwoAAOA4w1NnttZFkVWEtaSkJxWjGgDANINBs0uvZNav1cRwpWhSyoe5agMALMDgDZtFWKkMrlW8Ulvs5JLcu5lKAwAswejUWXbJFFSEWiY9bHUGAOAYg0Gz0bWCSUmtsmftk5Tz3Vy1AQAWYDBo9rrVPpl23Ws16a02hVSVz+eqDQCwAINB0+kgd1PyfgPAKvrDzZsAABxjMDWavH1oQbMqnqjJpra7mas2AMACDE+dpddKbgpWKFqtIFcInEcDADje6DxYEfrpsjbvFJk1AwBMNHgfTRU2itbfR5O8VZuN7c0AgEkGxyht7m/SzJ5kCqqCq0svZyoNALAEgyOapnujJvU3a0aVqoPLrJY7h58BAI4zOKKJYa19kiqdSZKypCJezFEXAGAhhs+jCYWq+P79Jtt9J2cAAI5z9D6yqFL7ZMo01QQATDAYNKagJkmNtiqtVptpqgkAmGYwaFJu1LopqlTptZosWtAAACYZTA1X0jq6XFkH2+mudQ4+AwBMMhw0nlUHV6FarqwqmoKVc9UGAFiAkaDplNyU1Cqq0CpK6/LpXLUBABZgZHtzpUPud5xJ0tvG1TF1BgCYYHREUwf1O878oG3K6vJurtoAAAswvL3ZClUxKyur0VbBjM0AAIBJRjcDtNkUFB6mz9jeDACYYmREExQkRS8VFNW5y+kMAACYYKQzQFQRXK0dlNTK3RWsnqs2AMACjN6wWQZX54f+4DN3FXE1V20AgAUYDJoqXiq5qbD+hs1oJlMc+hAAAL5hMGia9Faluc68P4OmDmwEAABMM5gcZdzosuwUFGQKMvXTaQAAHGs4aMKZgrmCB8X7HmdNdzNLYQCAZRgMmrvmSx1yUFBQk7dq3ZW9mas2AMACDAZN9k5B/QFo2fvtzQAATDHcVNMKBXMl9TdpliFInLAJAJhgMGiKuFERsjrr5OoDJvvdLIUBAJZhpDNAUMpBrqwm3alj6gwAMNHwiCbUanJQFtNlAICPMzqiMXPVvpJZYDMAAGCykc0ApQpzXfq5otWq6AwAAJhoMDnOwrXaHNSpP1nzqiRoAADTDHcGsFp3XVC6bzsTyRkAwESj0WEmBQUFK5RZogEATHTUGCUrqwxrVYxoAAATjUZHk02ttWrzTizRAACmGu51pqyUTV/bT9UmujYDAKYbDJqD3+quMzXaShIjGgDAZIPRsUuvJEkru5BZMUtBAIBlGQyaNu/UuimqUMp7bbu5ygIALMV4CxpJe7+Ve6sdpzgDACYaXXUJJkWVimF93F5oAAAeGcwOV1aQ60rPFMNaBUkDAJhoOGg8y0xK6uSeRWMAAMBUo0FTB+lgO2U/aJ+IGgDANINB0+U7mbnO/EIxrHVgMwAAYKLhzgD5tr/Ig2KolDj4DAAw0cjyfpC7qbXm0RqNzVAWAGApRoIm6XVr6tTfqbkKhAwAYJrRDcvRpLWfyZX7D7CzkxcFAFiO0aCpQj9hlvJBrbtiJGgAAMcbDZr6/opgpV61rdxpeAYAON4RRzm7OutUxY1cri69nqEsAMBSDDfVtEptNrnyfYNNk+gPAACYYKQzQKOzog+Z5J3OY6Fgm7lqAwAswOjUWWmutZ8pWFAVgjb153PUBQBYiNGguSyT1l6pCufq3HUen89RFwBgIQaDJthGwVxZrmilblOnwuq5agMALMDIGs1Br5tChaI6P+iqKBQ4/gwAMMFwalhQNGllhQqrtY5BpjhTaQCAJRgMmiJe6UnVqrag2s4lSZ0OsxQGAFiG0YPPNkWnzl21r9VlVyHWaAAAxyuGnsze6K4rdOMH1Vqpc6nx7Vy1AQAWYHiNxvuOzaX6TQB1lKKVc9QFAFiI0V1n66JTbVG111pH05lfzFUbAGABBqfOYrzSZdVoEzeydKYySHf2Zq7aAAALMHLDZqnz6qB3B2tml5K3c9QFAFiI4aAJ/YCntD5pysAaDQBgmtHtzW/2K62iySVtCo4IAABMM7K9uVWb+04AW2/1pDKmzgAAkwwffKao69VOZlJWlrvUaj9XbQCABRgMmrPymarYSZKiorJMUazRAACONxw04Vq3Ta3kUqnwcKwzAADHGgyaTgftulLbzhXM9LoJrNEAACYZDJomb3VeHpTcdesHFeYy4zwaAMDxBlOjzTtdrXf3F97fS6PV6asCACzGcNCkO0lSGUylCt12JuOETQDABMNrNOlGL7cb7VNWlGmXpGCcsAkAON5wZwC1/WaAnLQOUXdd5uAzAMAkIy1oki6rvfbe6eBZJlNwps4AAMcbTY266BQVtPdOZ4WpZjMAAGCC4RY0VqrLUWehUKNOyV21EzQAgOONjGiyspuy912bD0kKbjOUBQBYitGps5T7YHG5krvK4YbPAAB8w8hmgEYxuMoQ1FnSPueHGzcBADjGyBrNSsFc5zEoK2sV2HEGAJhmNDmKkBSDVHmpizIwdQYAmGRk6qyVu+ndf9GkFZ0BAAATjEydRe27Qi7X1ra661xJPldtAIAFGO0MEIOrCqadbWmnCQCYbPSGzbpoFU0KCupcD/fUAABwjJERzUH7tlRyKSurZnkGADDRYNCEcK59KhVNStZqHU0lJ2wCACYYTI2quNK7mbKkTi6pMG7YBAAcbzBocu5UxaSLUooqtO1cea7KAACLMBw03ipaVhlc5/lKyaVDTnPVBgBYgNHuzU0qFE3KllUG6TzSGQAAcLyRXWdZ+1SozdLOtlpFU0W/MwDABIOpEcNaTer3NCe1Kkwq2AsAAJhgMGjW5VOZuYJJn6fvKJhURZIGAHC84c4ACnI3ZZdq9ffTrLlpEwAwwVG7zpKbbrWTJK0IGgDABEdtIcsutdaqDi7u1wQATDE4oinCSsmDCnOVXuq6zlpFmmoCAI43OKKp7VySdNOZWmt1USQlZ+4MAHC84RGN1cqSkvf30WxTEDNnAIApBkc0pddKuc+ioKBXTSRoAACTDO86s6wvd7XaLJVeSZJeN0QNAOB4g0GzyefapqB9ctVeax1dO3pqAgAmGAyaUpUuiqx9yuos6fP1QU1i1xkA4HiDQdM30sxaxaCt3WpTdLSgAQBMMhg0e7tTctNFaeqsU5sDIxoAwCTDmwGUH9ZoOh0UQ9Zt4oxNAMDxhtdovFZy6abNetP9WKURMgCAaUZPMXtet7oog7JnXa92ukvtHHUBABZiMGhaO2hTtrosTdfld3Wx2uu1beeqDQCwAKMjmn2KCiZ9O31XklTQ6wwAMMFg0CRv1aSobSe5XCkHZbFOAwA43mDQRCu1in0rgIMdVBWdohjRAACONxg0QUHrotNFKdVeqypaXdt6rtoAAAswukZTh6SLwvUsbHR1+VbryIgGAHC80ftoivh+TaZeHTgmAAAwyWDQHGynJkXddKZ/yHfa71Z6Wo8OggAAeDAcNH6rIFd26Vy1bu/O9aym1xkA4Hijw5On6602hevzVa19U4mYAQBMMRg0pqAs0yq6qmAqQtK2Y5UGAHC84c0AtlLKQbtk+vqQdHlxq7OCMQ0A4HjDRzn7lSRpFVz7nFWUrdaRoAEAHG8waGpf6dn5Wz1ftdrEqLu7M+0SU2cAgOONBE2l58+/0vP1Tk+qoJQKXZb0OgMAHG8waCoVKupG0bLuOlfOps8qzqMBABxvMGhWVsjM1eao7NL19Wu2NwMAJhkMmuyuty+vVVjWrz9xXT57pWhEDQDgeMXQk2+0Vc6mJ+udflNSsTroW2ecsAkAON7IUc6trq5fK1pW50Fvf/pM64I1GgDA8caPCTjfadtW+qvXF/rqq2dqEscEAACONxg0nXU63K5VhKxoUoxJT8/uZFbNVR8A4BM3GDRf6Yf6yY8/19V6q+9/9lIvvvOlnpzfKIaLueoDAHziBoPmpvlS7qZV1eii3mvz4mvt9qu5agMALMBg0MRQ6frJa0nS19uNDi8v9XZ3pi69nqE0AMASDAbNOl5rc3WjGDs929zq7//PP9Om3s9VGwBgAQaD5ixc67BbabPZ6re+/1/VdYWKmFSX35qrPgDAJ244aPxC/+sHX8izqX5yo+/8yo906EpdVN+eqz4AwCdueDOAXur67Fbupjc/fKHr3/obPXvyWm3ezVUfAOATNxg0waL++e/8lTYXt/rxD78ji1nnlzc6pLdz1QcA+MQN9jo78wutf/nnWn3+D7r+3g/UfPlE+91KNt5QAAAASaNrNOfa/d1zhYuk4ouk2y9/SWcXt/pW9b256gMAfOIGRzSFR/3kf36hX768U7zZaXV1o9WLl/qX/m/nqg8A8IkbHNF8O57rV37vv6m7PdPP/+J72vxBI0k6i0ydAQCOM5gYL9ZR3X/6Q63+3Vov/uNPtP3X/17tm3M9qQgaAMBxBqfO1lHKT35Vu994LoVCam+1+u2tvv+U7gAAgOMMBk2TpbM/+8/Kv/RC6clzhT/9c/lvP9d1dZirPgDAJ24waG5a6W//y7/Qar1XvfmpfvS3v69/tftz/e+b87nqAwB84gaD5rbL+tHPXujn23O5SzddqR/+yWf66xtO2QQAHMfc/Rc++W/O/sj/6IusH9yt9fIQ9XvP3vQfZFIw16ZstLmfRnM3dTnqzX6lNgdtylZV7PR0c6vrq/7juq5Q05b68cvPtOtKJQ9K2VTFpFXRqYpJF6u+vc2uqZQ86HK11ao+aFU3qtc7lWWn1dWNYtWq/uytwmavcJWlTf3hbyJY/7bppC7174fQv63K/rHUv61X8rN+tOYh3F93H6o5STlLRSkvyvf/Jj1cY82+vyYEeVHKmoPC9rb/fEX/tWy/k7pOKor+83eddHNz//ny+1qC9deU1fsaH1/z7rqikFe1FGJf87tfbM59ffdfy1dn/fPF/emoXSPL+f77jLJmL2sOUs6yru2/Ttf1b/3R1+x/kdLtTkoubyQ1pvbrCx1eXqrbV9rfbJRSVM5Bnk3u/f9tW2p/qB9eL4+Z9a/DGJOKmB7+PeWgpqnU5ag2RaUc9Gtf/EDPf/+/y9YmXW76C9/eSa3321uiSVUhrer+ZxSLb/7s/t/vJ3/g/bc7+Y2U3q61+9lTpUOpZruWuz3U+qHv4+Hf77/v/Oj5GLIsfPjvzbMp5aCuKZVzUNuWSjk8/OxSiupSVBGT1qu91mc7fft3/4fii7b/XqtSerVV9/OVlIN0/7XC+UFW3X/LWbJCUmlS8e71/+7v4f5/C+9fm49/NkUhD1H27u8gBPm7v43i/ufbdf1r57HwgY1Dv+i11TQf/ricpX0jdS6/c3kblO9WyodS7e1aze2ZzFzl2V6hTKqev1JYt/JDlLeFupu1mrfn8i4qdVE5RTXbldq2VE5BOUeZZYXQ1xJi7n939899SM4f/r33z/W1v3udvPsdmrlCyArBZZZl5g/XFFWrokgPr4V3v/N3dTz+PI//jkJw5WzKOSi76e6w0q4tdUiFtl3Zf57712C8/3rupqz+T+VxjY+ZuYJcZlJh/c+l8yB3KcsePubx5wly/Ye//OMP/mAGgwYAgH8s9ikDAE6KoAEAnBRBAwA4KYIGAHBSBA0A4KQIGgDASf1fiAXkpjnSckYAAAAASUVORK5CYII=",
+ "text/plain": [
+ "<Figure size 511.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 511.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 511.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "for y, name in [(x_in, 'x_in'), (x_stp, 'x_stp'), (x_ltp, 'x_ltp')]:\n",
+ " spec, freqs, t, im = plt.specgram(y, NFFT=512, cmap='inferno', noverlap=256 + 128, pad_to=4096)\n",
+ " spec = 10*np.log10(spec)\n",
+ " \n",
+ " make_playback_animation(f'lp/{name}_spec.mp4', spec, len(y)/16)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "torch",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/dnn/torch/osce/stndrd/presentation/playback.py b/dnn/torch/osce/stndrd/presentation/playback.py
new file mode 100644
index 00000000..4d7e6c78
--- /dev/null
+++ b/dnn/torch/osce/stndrd/presentation/playback.py
@@ -0,0 +1,25 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import matplotlib.animation
+
+def make_playback_animation(savepath, spec, duration_ms, vmin=20, vmax=90):
+ fig, axs = plt.subplots()
+ axs.set_axis_off()
+ fig.set_size_inches((duration_ms / 1000 * 5, 5))
+ frames = []
+ frame_duration=20
+ num_frames = int(duration_ms / frame_duration + .99)
+
+ spec_height, spec_width = spec.shape
+ for i in range(num_frames):
+ xpos = (i - 1) / (num_frames - 3) * (spec_width - 1)
+ new_frame = axs.imshow(spec, cmap='inferno', origin='lower', aspect='auto', vmin=vmin, vmax=vmax)
+ if i in {0, num_frames - 1}:
+ frames.append([new_frame])
+ else:
+ line = axs.plot([xpos, xpos], [0, spec_height-1], color='white', alpha=0.8)[0]
+ frames.append([new_frame, line])
+
+
+ ani = matplotlib.animation.ArtistAnimation(fig, frames, blit=True, interval=frame_duration)
+ ani.save(savepath, dpi=720) \ No newline at end of file
diff --git a/dnn/torch/osce/stndrd/presentation/postfilter.ipynb b/dnn/torch/osce/stndrd/presentation/postfilter.ipynb
new file mode 100644
index 00000000..3a225aee
--- /dev/null
+++ b/dnn/torch/osce/stndrd/presentation/postfilter.ipynb
@@ -0,0 +1,275 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib.animation\n",
+ "from scipy.io import wavfile\n",
+ "import scipy.signal\n",
+ "import torch\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "plt.rcParams.update({\n",
+ " \"text.usetex\": True,\n",
+ " \"font.family\": \"Helvetica\",\n",
+ " \"font.size\": 20\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def load_lpcnet_features(feature_file, version=2):\n",
+ " if version == 2 or version == 3:\n",
+ " layout = {\n",
+ " 'cepstrum': [0,18],\n",
+ " 'periods': [18, 19],\n",
+ " 'pitch_corr': [19, 20],\n",
+ " 'lpc': [20, 36]\n",
+ " }\n",
+ " frame_length = 36\n",
+ "\n",
+ " elif version == 1:\n",
+ " layout = {\n",
+ " 'cepstrum': [0,18],\n",
+ " 'periods': [36, 37],\n",
+ " 'pitch_corr': [37, 38],\n",
+ " 'lpc': [39, 55],\n",
+ " }\n",
+ " frame_length = 55\n",
+ " else:\n",
+ " raise ValueError(f'unknown feature version: {version}')\n",
+ "\n",
+ "\n",
+ " raw_features = torch.from_numpy(np.fromfile(feature_file, dtype='float32'))\n",
+ " raw_features = raw_features.reshape((-1, frame_length))\n",
+ "\n",
+ " features = torch.cat(\n",
+ " [\n",
+ " raw_features[:, layout['cepstrum'][0] : layout['cepstrum'][1]],\n",
+ " raw_features[:, layout['pitch_corr'][0] : layout['pitch_corr'][1]]\n",
+ " ],\n",
+ " dim=1\n",
+ " )\n",
+ "\n",
+ " lpcs = raw_features[:, layout['lpc'][0] : layout['lpc'][1]]\n",
+ " if version < 3:\n",
+ " periods = (0.1 + 50 * raw_features[:, layout['periods'][0] : layout['periods'][1]] + 100).long()\n",
+ " else:\n",
+ " periods = torch.round(torch.clip(256./2**(raw_features[:, layout['periods'][0] : layout['periods'][1]] + 1.5), 32, 256)).long()\n",
+ "\n",
+ " return {'features' : features, 'periods' : periods, 'lpcs' : lpcs}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lpcnet_features = load_lpcnet_features('lp/features.f32')\n",
+ "\n",
+ "features = lpcnet_features['features'].numpy()\n",
+ "periods = lpcnet_features['periods'].squeeze(-1).numpy()\n",
+ "lpcs = lpcnet_features['lpcs'].numpy()\n",
+ "\n",
+ "x = np.fromfile('data/critical_3.pcm', dtype=np.int16).astype(np.float32) / 2**15\n",
+ "x = np.concatenate((np.zeros(80), x, np.zeros(320)))\n",
+ "x_preemph = x.copy()\n",
+ "x_preemph[1:] -= 0.85 * x_preemph[:-1]\n",
+ "\n",
+ "num_frames = features.shape[0]\n",
+ "x = x[:160 * num_frames]\n",
+ "x_preemph = x_preemph[:160 * num_frames]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def stp(A, alpha=0.8, beta=0.5):\n",
+ " A_num = A * (beta ** np.arange(len(A)))\n",
+ " A_den = A * (alpha ** np.arange(len(A)))\n",
+ " \n",
+ " return A_num, A_den"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "frame_idx = 31"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[<matplotlib.lines.Line2D at 0x7ff3342887f0>]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 432x288 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "A = np.concatenate((np.ones(1), lpcs[frame_idx]))\n",
+ "A_num, A_den = stp(A)\n",
+ "w, h = scipy.signal.freqz([1], A, fs=16000)\n",
+ "w, h_stp = scipy.signal.freqz(A_num, A_den, fs=16000)\n",
+ "plt.plot(w/1000, 10*np.log10(np.abs(h)))\n",
+ "plt.plot(w/1000, 10*np.log10(np.abs(h_stp)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_frame = x_preemph[frame_idx * 160 - 80: (frame_idx + 1) * 160 + 80]\n",
+ "\n",
+ "window = scipy.signal.get_window('hamming', 320)\n",
+ "spec = np.fft.fft(x_frame, n=1024)\n",
+ "\n",
+ "log_mag = 10*np.log10(np.abs(spec[:512]) + 1e-6)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 648x432 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "fig = plt.figure(figsize=(9, 6))\n",
+ "\n",
+ "plt.plot(w/1000, log_mag - log_mag.mean(), alpha=0.7, label='spectrum')\n",
+ "plt.plot(w/1000, 10*np.log10(np.abs(h)), \"r--\", label='LPC spectrum', linewidth=3, alpha=0.9)\n",
+ "plt.plot(w/1000, 10*np.log10(np.abs(h_stp)), \"k--\", label='short-term postfilter', linewidth=3, alpha=0.9)\n",
+ "plt.legend()\n",
+ "plt.xlabel('kHz')\n",
+ "plt.ylabel('Amplitude (dB)')\n",
+ "plt.show()\n",
+ "fig.savefig('plots/short_term_postfilter.png')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "periods[frame_idx]\n",
+ "\n",
+ "p = int(periods[frame_idx])\n",
+ "ltp_num = np.zeros(p+1)\n",
+ "ltp_den = np.zeros(p+1)\n",
+ "\n",
+ "ltp_num[0] = ltp_den[0] = 1\n",
+ "ltp_num[p] = 0.25\n",
+ "ltp_den[p] = -0.25\n",
+ "\n",
+ "w, h_ltp = scipy.signal.freqz(ltp_num, ltp_den, fs=16000)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 648x432 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "fig = plt.figure(figsize=(9, 6))\n",
+ "plt.plot(w/1000, log_mag - log_mag.mean(), alpha=0.7, label='spectrum')\n",
+ "plt.plot(w/1000, 10*np.log10(np.abs(h_ltp)), \"k\", alpha=1, label='long-term postfilter', linewidth=1.5)\n",
+ "plt.legend()\n",
+ "plt.xlabel('kHz')\n",
+ "plt.ylabel('Amplitude (dB)')\n",
+ "plt.show()\n",
+ "fig.savefig('plots/long_term_postfilter.png')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "torch",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/dnn/torch/osce/stndrd/presentation/spectrogram.ipynb b/dnn/torch/osce/stndrd/presentation/spectrogram.ipynb
new file mode 100644
index 00000000..9d3d96f1
--- /dev/null
+++ b/dnn/torch/osce/stndrd/presentation/spectrogram.ipynb
@@ -0,0 +1,173 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "04cba77c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_252797/4199785080.py:9: WavFileWarning: Chunk (non-data) not understood, skipping it.\n",
+ " fs, x = wavfile.read(f'data/a3_short_opus_{br}bps.wav')\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 432x288 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZwAAAEeCAYAAAC+OaPqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWDElEQVR4nO3dO48kWXre8ec9Jy55qazq2/bs7oy4JCRKWEEguYAkyJBsegQEyJAhS4a+inw5cvkFCJr0+AEIOqIgR9ASQ2lmZ7Rz666uyktEnPPKyLr1zGxEZcxWCAj+f1Z2Z1b1m1U58+Dc3mPuLgAAnlr4/10AAODvBwIHADAJAgcAMAkCBwAwCQIHADAJAgcAMImi70mz4jfumXbv7LdfDgBgrhjhAAAmQeAAACZB4AAAJkHgAAAmQeAAACZB4AAAJkHgAAAmQeAAACZB4AAAJkHgAAAmQeAAACZB4AAAJkHgAAAmQeAAACZB4AAAJkHgAAAmQeAAACZB4AAAJvGIwIlPXwUAYPYGA6eI51PUAQCYucHAWZavpqgDADBzJ63hmIqnqgMAMHODgXMWX09RBwBg5gYD55X/9O6xWf2kxQAA5mt4hOOru8fr+qc9rwQA4DcbDJyFyuMLba1lfP7kBQEA5ml4l1q4P4ezMgIHADDOQOBEPa+OO9PcD6q8mqAkAMAcDU+p3QxwXK6X+cVT1wMAmKmBwElaF/ePN+xSAwCM9OiDnyFsVJg9ZS0AgBkbDJwyuCQphoUigQMAGGkwcOogSVFVPNcicpsBAGCcwQRpXTKLKsNSJXkDABhpMEKyS8GWChb1pklT1AQAmKHhEU42Zd9NUQsAYMYGA+eqk9wb7bpv1LlPURMAYIYGA+dwM4vWpmstI9dNAwDGGQyccLMTuohLZUY4AICRHrGGcwyZMiy1z/nJCwIAzNNg4OzSMXC6fNDW2ycvCAAwT4OBc9kdQ6YMS3ViWzQAYJzhKTUdp9GClQqitQ0AYJzhTQM3IdPmnTqxhgMAGGd4W7SOU2rJD2qte/KCAADzNBg4Wzt2GTi0X+pX9ssnLwgAME+DgWM3L3FvVdnqyQsCAMzTcGsbu7x5xA41AMB4w4GjryRJpkKNb5+8IADAPA0Gzj69lSS5OmVGOQCAkR7RvPPdg8eXPa8EAOA3G75iOm7uHida2wAARjpphGPiegIAwDiDgfNQZoQDABhpMHDc79vZBCuftBgAwHwNBs7DUU3yw5MWAwCYr0dMqd2PcA7tl09YCgBgzgYDJ+Xd/YutftJiAADz9Yg1HKbRAAA/3COm1O5f4tyHAwAY6RHdoh/c8ukEDgBgnOHACcu7xy7O4QAAxhlew3mwacCd5p0AgHGGA0f310qb0doGADDOIzYN3IeMe/OEpQAA5uwRgcM0GgDghzupeScAAGMNBk6w9RR1AABmbniEYwyCAAA/3CMOfhI4AIAf7hHNO6+mqAMAMHPDazhhNUUdAICZGwycGBZT1AEAmLnhGz8zhz0BAD/cI1rb0CEaAPDDDQZOGTdT1AEAmLlHrOFUU9QBAJi5wcC5KD6cog4AwMxx8BMAMInBNLnOX01RBwBg5gYDp7B6ijoAADM3GDi1nU1RBwBg5gYDZ6MXU9QBAJi54ead6qaoAwAwc8MHP8U5HADAD8eeZwDAJIabd9JLDQDwWzC8LdqLKeoAAMzcYOAsfXn32Iz1HADAOMPncFTePY5h/aTFAADma7hb9IOXmDG9BgAYZyBwTJv4cIRDmxsAwDi9gWNWqjS7/zO7qAEAIw0kSFByv/tTwQgHADBSb+CEb3WKruP5kxYDAJivgSm1oDLcT6ktbPPkBQEA5qk3cHJuFB+s4VS+lImdagCA0/UGjqvVIt4HTq2FZGwcAACcrjc9Yljr/H5XNH3VAACjDazhFCrvXhF1ns8VbNn3JQAAfK/BNZx0syvaLKpWKWNKDQAwwvAuNXOFsJF7q0Imd6bVAACnGzyHk2UKVklyRQty58ppAMDpegNnWb5QkEs3mwWiTK40RV0AgJnpDZxFeL+zQDD7TvcBAAAe45E7AIJMhaIZmwYAAKMMpkeW3a3bXJSBEQ4AYJT+KTU707rIKuJark7nJWs4AIBxegNn55eqgquKa8VwoWhSStupagMAzEj/ORwFBZPKsFJZbJRcjHAAAKP0Bk5lK2WXCqsVrdaDPp4AAJykN3BWfrz/pvODXEltltz3kxQGAJiX3sBJ6tRk0677Rk33VqtCCoFL2AAAp+sNnIPt1GZT9k7BalXBZY89ugMAwAO96dH4VmU4toteFBdqsinlq0kKAwDMS2/gvOs+lyS5Z5VhpSBXsMUkhQEA5mVgW3SUJMVQqclXKphNAwCMVPQ9uSyeKZpUhuMtn12W3A+TFAYAmJfewGnzTtLxts9opWKQXNyHAwA4Xf8utXSpfTJFK1XbmRY3GwgAADjVwI2fhZpsqu1MpqDWjXM4AIBRegMnWqlFPI5qXFltPoYQAACn6u804K2yH5t4ZiXt0vEQKAAApxoc4SSXOh20sDN1WQpWTVUbAGBGegOnywftkykrKXqpzsUV0wCAUXrTw5W1KlzuWclaXbd+d900AACnGByulHa8FyepUxVN7nmKugAAM9M/pZZ2ypKSWhWqtYzSonw1UWkAgDnp3zQQaiWXCtVKarVLXDENABinfw3H87GXmtVK3urrJqlN11PVBgCYkd5TnCEUqoKr8Z2SWtWBNRwAwDgD1xMEuUuF1cpKSrRSAwCM1DvCcR1HM6XX6rRQdlfO+0kKAwDMy2Dzziq6WjsoqVWWFAI3fgIATjd4Dieaq/GtmrxVm7OKuJyiLgDAzPQGThlWSm6qbCVJKoxNAwCAcXoDp81bleaqfSlX1iLSRw0AMM5gt+hN2SmqUGG1TKbs7VS1AQBmpDdwqnCmImQVXihaKZerS2+nqg0AMCO926Kvuy/UpKigcNw0YC53WtsAAE430LzzWtFcpqDkByV3mcWpagMAzMhA886lXFLS8Q6cMgSJXWoAgBF6A2dRPFMVsg62V5cPMkkuLmADAJxucJdaclNQUJd3Sk4zNQDAOIPNO7fpfs0mmj15QQCAeXrEFdOu2hcKVorxDQBgrMEpNUla+ELBChWMcAAAI/UGzsqeK0tKSuryThcVgQMAGKe/04BX+upwHOUEKxXJGwDASP1TaipUh6ygoGC9TQkAAOjVGzhJncykrKwi1GKAAwAYa3BbdJtNBzuoSdeq6WoDABip/z4cNeqy6Rv7tdruLSMcAMBovYGz06W2ybT3S5kVWkRO4gAAxukNnINfKblpFZ5LEgc/AQCj9QZO9qTkx91q2Q+67phUAwCM0xs4rqzs0t6vJO+0pVE0AGCkwV5qdXBFlYrxQoEBDgBgpP4Rzs110md6piIuVQ3GEwAA36//4KcfD35KUs6dMrsGAAAjDQTOQYvo2tm1unytQ5qqLADA3PQHTj5Ikpa+VhHWOjDEAQCM1B84aaug47Zos6A2T1QVAGB2+rcBWFDrplaN3LOco58AgJH6D37md+8d9qzYFw0AGOlRG51LVYqhkrsk0TIaAHC6/usJrFJpruBBbbpW664QVlPVBgCYkYGDn42WhSsoqIhLbVOWnJ0DAIDTDY5wJKmzTsEKHXJS9utJCgMAzEtv4MSwVnJTVla0kl1qAIDRBg5+XmsZ76fQLopK4t5PAMAIg2s4dXCt81rZs6pgKuKLqWoDAMxI/xqOCm3KTlFR0Qold13UvzNVbQCAGelfw4kXym4KMhVhoX3Kqoxt0QCA0w1MqXV61x0PenZ5r3URZY87KwoAwHsGtkUHBUlnVmsRL7SKbBgAAIzTGzhl3OhZ1aq2oEK1Opc6HaaqDQAwI4MjnGXRKUuqbKXkzhoOAGCU3sBpundqUtQ2d1r6SsGkvb+bqjYAwIwM7ADIavNxHefYbYA1HADAOP334XinTdkqWtAmb7SM0pm9mqo2AMCMFL1PhrXWZaNNXCuklcogHfxqqtoAADPSO8IJodCmPtxNpSWXWu0nKQwAMC/9nQaslrspmNR5VhWkwI2fAIAR+kc4FrRrS1XhOMI5K1zJ20kKAwDMS+8ajiQlP4bNXq0uqkDgAABGGegWHXRR7xVMynJ12ZScTgMAgNP1Bs46vlIRk4KkcHPxWhXOpqgLADAzvYFT25muDrXMpJVVOmSm1AAA4/QGzsGvdNXWumyzWs+6bIMygQMAGKE3cJJanVd7HbJrp0ZBPlVdAICZ6Q2czg86X+5UmKlSoSxTsHKq2gAAM9IbOG3eSpIKM0UFbTspEjgAgBH6rydI1/r11bn2OR//nI935AAAcKr+KbV0rX1X6Dq1WoVC152r1GKq2gAAM9IbOO6dntU7HdSp9TxVTQCAGeoPHCVVMalWoSs/aBVNS51PVRsAYEb6W9tYqTZHVRa1s4M6l2qm1AAAIwx0i64k6e70TZNdwdk0AAA43UB6ZHU5qPOsoKA2u2qvp6kMADAr/Z0G8k5FyKpDVFZW664wlFEAAHyP/hs/w1LBXJsiypW1ikF20zUaAIBTDAxXgsrYqQxS6aU2pd1dUwAAwCl6Ayd7oy5HZZeioqJJtWhtAwA43eCCTMpB7tJluNRl42rVTVEXAGBmBgPHXYpB2to7GbNpAICRBs/hrKpGpZnizVRaJ1rcAABON7At+lpNKmQmBQ+qw/FeHAAATtUbOEXcaNceRzbZstalFDmHAwAYoTc9lsULJQ8KJh38Sof0iEUfAAC+x+AV02VIWkapsFpNlrq7zmoAADzewDmcTkXIKoN0np+rza6kNFVtAIAZGbiALevQFYrmypZVBtPqpoM0AACnGFyS2adCTTZt7Z2WUTqL7FIDAJxuoHlnpSZFSVJWVsGOAQDASL0RsipeyswVTfpp+kjRpDNSBwAwQn+nAUUlP/azKW8OfNbx6YsCAMxPf6cBbxXN1WZpr0YmqaCfGgBghN4dANFKRfPjwU87aBH9bsQDAMApegPHFJT8eMdnVKFnVZKLOTUAwOl6p9QWdiZJumylVo02RVIV6DQAADhdb+DUvpS7yV062F77FBSZUQMAjDB4ivOQj5kUVehNG7XrSBwAwOn6D36q0Ge7hVp3FV7I3fTVgcABAJyuN3DO8pl2XdA+uWqvtS6SDpk1HADA6fovYFPQeXkMmc6SfrI8qCFwAAAj9AbOzhotYtYiBO1tq2XR6bhJGgCA0/QGzpVdqs2ms8J0sJ2aHNTkPFVtAIAZGeilFrRNxzWcrKzSst6ldqraAAAzMrhLrcumt13S2+5TVTEpGt2iAQCn62/eqU6vF63WMSp5p4vFXm99N1VtAIAZ6T342VijTdnqRV3rRf6Zzuq9vojvpqoNADAj/Ws4HrTtCpVB+nH6UCZX6fVUtQEAZmRwQabJQdc3+wS6HJXFLjUAwOkGA2dZdOpcOlijMnZT1AQAmKHewMmWdV62WhfS0mst6oM+yK+nqg0AMCP9rW28UBmSLirXq7jSZnOllVVT1QYAmJHewClVKd5cuObuqupGgdY2AIARegPnoL32XaHrzvRN3uuwr/WiHLxCBwCA7+gNnL2uVIakNksrq3R1tdYHS0Y4AIDT9be2sVIXi702heujRaX9oVbidgIAwAi9gePKipa1LlxFkOqq0TU7owEAIzyieWc8ruE0WevVVmuWcAAAI/QGzkV+qeymRZTedUlF2WkRmVMDAJxu4BxO1I82b/Vq0WoVo7bXK+0SmwYAAKcbOIdT6PXrL/R6sdfzKijloGclIxwAwOl6A6dWqaJuFOTaJ1cw16sFN34CAE7XGzibWEqS9qlQk13nF5dqM1NqAIDT9QZOm7PefXOhRez0z55Jm9dfqyBvAAAj9G5yfut7uQe9WO70R0WnWLf66Wo7VW0AgBnp76Vmrc5ffKMYstoc9faT11oVrOEAAE43cMW0qd5sddXU+h9vzvX2zYWaFKeqDQAwIwMXsLn2b85UhiRJMnO9Wl9NUhgAYF5613C+DJ/ps1/9RM+WW/3i5Td6/dFnartC1v9lAAB8R+8I5+v2Y+UctKwOuqh3Wn3wta73S8l6vwwAgO/ob21jtV6+/Fo5B31+tdH+63O92a7k3kxVHwBgJnoD5yy+1ur8SmXZ6oP1lT795c+0Weynqg0AMCP9nQb0QvvrpTabK/3RL/5GbVuqLloV8flU9QEAZqI3cJa+0v/8+PckSfWLt/rwZ59o31Y6qz6cpDgAwHz0Bs6lvdHF8lqeTVef/UjP//CXenn+Vk1mazQA4DT9Bz8V9PN/8TeqFwf9309/LCuz1mfX2jWfT1UfAGAmeg/UnPm5Fj/7teoff6VXu1rNp8/UNqWC1VPVBwCYif4L2LzQ/u9eK1wkFf+w1btPX2u9udYHyz+Yqj4AwEwMXsD26//1D9T+aq30ibR8fqkX//Rv9Y/yP56qPgDATPQGzuuy1kf/6r/r8OUzffXffl/rP85SDqqNBp4AgNP0Bs6Hq6DmP/xbrf9Eev3vP9f2X/47Nd9s9KyklxoA4DS9ybEpXH7+u9r9wVoqFrLDGy3/+df6xZ+nqeoDAMxEb+Dsk7T6y/+q/MGHSmfPFP7ir6U/fKkXFZewAQBO03/FdGv6uz/9fdWLg6rVF/rk43+tn+/+Sh9fL6aqDwAwE72Bc9m6/vfnP9Gvrs4V5PrqsNDf/tlrfUyjAQDAiczdf+OT/2b1n/w//m7WL6+WkqRox7+vQ1Y0KZqrDK7kpn0yZZmSH1/3O+utntd7RcuKwXXoCr1pajUp6uumVJdNZsddC5uy01n5/jSdu8nM9aw6aFU1qotWy7JRUSQt6r2KotPqbKtQdIplpxCzPJtyisopKLWlPJvcTdlNno/7I2JMikWSWZbFLAsuM5eFfPz74v52UwtZMj++JiZZcHk2ye/3Wng2eQq3P01ZcOW2UE5BnqJSW9y9n4dunz/+W/n4l+HB7yKbLObj+3JTTkHKJvcgd1MISQquompVLJq7Wo5fe1NPOH79e+/j5t/IXZD8+PPybFIOx5rc1DXl3c9A0t338Hx8/vbfcTddv92oaSp9dXmhN7uVmhR13ZV3P3dJCjffJ7spS+pyUJuDsqR085o6ZJmkMmRF87vn3E1tDorm+vnLL/Sjizdarbdanl0fa0nx/ucjqVrtVVStLCaFmN/7PN3/vuL9ZyNFhZvPhLup3VfyFLS9WqtpKl1enenL643STc0P38/x83v8/Kcc1HlQd/u+bj9vIX9nZ85tVbevuZXclG4/r9/+mpv6Pzq70j/5ySeqFwctV1spuLpDpa4r1DaV9odKRUyq64MsuELI773/ouiOn/Obn8/te1fID/6xcPd6M5e7fedzHItOoeyOr7n5THkXv/M5v3X7fW5/7qkt7v67yTne/W4eattSXVuo6wo1bamUovZtpZSD9m2lNgXtulL7VGhVtLqo9woh3/1+tk2lJhXKMrlLbY5qUlSW1N68x9t3ffx/2fGzV9/ccHz8jL5fU7j5zcSb9xwe/KZu3+Pt13z7udu6bn+fwVzRbr/fsZLiQf3Zj3XvUyl3aduVd5+127qDpNZNV22pzqVdCkr5+L7cTdFcRXj/0+QPapCk5Pf/HeabJ4tv/RrtQe3pW7/j7MfvaZLCzVP/+f/8l+/9IPQGDgAAvy1c3QkAmASBAwCYBIEDAJgEgQMAmASBAwCYBIEDAJjE/wOWgxUFc1j2fQAAAABJRU5ErkJggg==",
+ "text/plain": [
+ "<Figure size 513.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZwAAAEeCAYAAAC+OaPqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVy0lEQVR4nO3dTY8kWZbW8efca2b+GpGRWVnVSauYXgyC0bRoMSMkJCS+AJoFOxZ8GDZ8BCQkPgRbdqxYIVatkUaoRTMw09PVXdmVL5Hu4W5m9x4W5hnhOV1pFm5TYUjG/7fo8si0yDwR4dmPrtm555q7CwCApxb+XxcAAPj/A4EDAJgEgQMAmASBAwCYBIEDAJgEgQMAmETR95tmxWd7pt1b++HLAQDMFSscAMAkCBwAwCQIHADAJAgcAMAkCBwAwCQIHADAJAgcAMAkCBwAwCQIHADAJAgcAMAkCBwAwCQIHADAJAgcAMAkCBwAwCQIHADAJAgcAMAkHhE4nLMGAPi7GwycqvjRFHUAAGZuMHDavJuiDgDAzA0GTrDq7CNurwEAxhkMnFX54v61KT5pMQCA+XrECqe8f+1KT1oMAGC+BgNnHZ6ffeRPWAoAYM4GAyeerXBMxZMWAwCYr8HAsbNLYrx60mIAAPM13DTgm/vXRdz0XAkAwOcNBs7S1/evF/H6SYsBAMzXYOB84Q+30T5tIAAA4PEGA+d5+dA0cKUXPVcCAPB5w89wYneJqdB1fvbkBQEA5mkwcBan4QJmi6euBQAwY4/Yh9P915UUOT4HADDSI/bhdGJY6dqWT1wOAGCuho8ncEmKkoKWgRUOAGCcwQTJLplMRVzpXWqmqAkAMEODgbNrXa5WbdrprX2YoiYAwAwNBs5t2x1J4J517Yy2AQCMMxg4ybsjCVxZK2NaNABgnMHAabxb4cSwVOY8HADASI9oi+4ao3Ou9c7vnrwgAMA8DQZOfTpWOvtRu7B78oIAAPM0GDjfhbeSpDLeqHCe4QAAxhkMnA/2rrswFMrKT14QAGCehod3+kqSlHOr1tonLwgAME+DgRNOl7R5p0b1kxcEAJinwcA5WteZlvOt3vrfPHlBAIB5esQ+nOP967v2zZMWAwCYr+FJA3oY2FmG1ZMWAwCYr+Eutfbb+9dm8UmLAQDM1/DGz/T+/rVx4icAYKTh83DyQ2easw8HADDS8Cy1swnR2TmADQAwziPukT2saprM8E4AwDjDGz9tcf+6TQQOAGCcwcDx07RoSSojJ34CAMZ5xAqnvH9dhEXPlQAAfN7wCscfnuHcNa+ftBgAwHwNt0WfjbY5b5EGAOASw6NtzjrTQqietBgAwHwNt0X7+Rk4TBoAAIzziC41v39tRuAAAMYZTJAYtvevUz48aTEAgPl6xGibs0ucWWoAgHEecY+M22gAgL+7i2apnU8dAADgEhctX5xp0QCAkR6x8fO8LZpnOACAcR4x2uZ4/tETlgIAmLNHDO9cnX1kT1gKAGDOLmqLtrPJ0QAAXOKiZzjudKkBAMZ5xC214uwjmgYAAOMMBk4Zr84+omkAADDOYOBwyicA4IcwfB4Omz0BAD+A4S41xSnqAADM3PDxBGdNA2bLJy0GADBfw00DYX32EV1qAIBxLnqGwz4cAMBYj7il9jBdgEkDAICxLjtd7ZPJ0QAAPN5g4CxsO0UdAICZe8Rom4e26PjJ1AEAAB5vMHAqfzieIBhTBwAA4wwGzsof2qJddKkBAMYZDJyjHe5f51w/aTEAgPkaPg+HzZ4AgB/A8LRonY+2KXquBADg84ZH23h1/7qIq54rAQD4vOHA0UPg5MzGTwDAOMMbP/1hnE2bbp+0GADAfA23ReshcELgeAIAwDiDgdPK71+XcfOkxQAA5mswcNZnnWlloGkAADDOI2ap2f1rM46bBgCMMxw4Z685gA0AMNYjDmCzoUsAABg0vA8nPAROFTkbBwAwzmDgbIqHS6I4YhoAMM4jbqk9vH6mL5+yFgDAjA0GTvKhKwAAGPaIZzhnr88GeQIAcInewAnhStXZFStn4ycAYJzewFmVXym7ZNatbLZaTFIUAGB+egPHlU7PcLrLog3egQMA4Hv1JkjKtZJL7gdJ0t7rSYoCAMxPb+CYhU+61K5sIYnJAwCAy/UGThlWui6lsuj233y5KIc+BQCA79WbHk2+UzAphoWkqDIwMRoAME5/W7QVqkJ3T+0haFjhAAAu15se0UolN5mCpKB1IZlY4QAALtcbOFfxlYJcwUpJWYckZd9NVBoAYE4GVziSlPwos4WqIJmKvk8BAOB79QbO0T9on0zuWWW80qZwic2fAIAR+jd+eiNJcs+q4kZ3yeRs/gQAjNAbOLv2WyWXYqgUrPzkbBwAAC4x8AynG9ZZhpWO6f0kBQEA5mkgcAqVQSrDWm260zFNVRYAYG4GnuG0KqybqbYqX2jJFhwAwEi9gXNoXqvOUvZWVdiqDJw3DQAYp3dTTfajDqkbcRPVTR0AAGCM3sAxK7UupDYf5dZNGgAAYIzeW2pF2Cia5MqKVhI4AIDR+qdFh24BZAqqbC3nEQ4AYKT+83DaWx2TtAhbFVqocRcnfgIAxugNHPdWRZDu0ht98NfatS6JZQ4A4HIDt9SWqoJUhKXafDyNtmGFAwC43MDo56zsXVv0Imy1ikF2GncDAMAlegPHrFCwblq0K+suZflpgjQAAJfoH22T9pKkdXiuOu/1pmkk0RsNALhc/y01C/fjbLI3WkdO+wQAjDPQpXZUk02urDKs1eTMEdMAgFEGAicp+8PJn0kuVztJYQCAeelvi7al1oXLFFTnD4q0RAMARhrYh1MpSEpq1OajghE4AIBxBtuiJam0pYrA/hsAwHi9gdOmW62KrKhCVdhqGQb2iQIA8BkDK5xS12WrtV+pyfvT8E4AAC43eB5OYa7gQcFKJXdJcaLSAABz0j9pIN/pkKKCgoJFBUlGpxoAYITeXZyuLKk7gC15o4Z9OACAkfqf4SjIzNVYrWN6r03kdhoAYJzewCmLK0VzlV4p2kKRfTgAgJEGRtt0t9SSWiU/qs55kqIAAPPTGzgxVNq1UY3VyrnVkcABAIzUHzhW3t9Sk6QFGz8BACMNDO8sVQZXqUohFFpGAgcAME5vgizDtdxN6dQKXQWaBgAA4wwuWQ7JVFstU1BB3gAARhoMnH0KalSrzXeq2IYDABipv2lApdy7ZY07EwYAAOMNdqllScm6I6ZLegYAACP1b/w8zVKr/U7ZW0We4QAARuoNnLv8TvvWlNTIvVFpnIcDABinP3DSG7mkQgu5H9Q6SxwAwDgD5+HUyi5VtpYk1Uy2AQCM1Bs4VdwomBS8u+y2maQmAMAMDZ+Ho49dapGNnwCA0XoDp847HdLDxwVt0QCAkfoDp30nl5SVFWwpetQAAGMNHMB2VBmk2vdyJe1bIgcAMM7AM5xSJmllzyRJu5Y2NQDAOL2Bk32vMjx0qXHiJwBgrME2gHx2F20dGRcNABhnIHBc72opKKiMN1pwABsAYKTBFc4iSlGFzOiJBgCMN5giV6Wr8ELuWV2TGqscAMDlhlc4oVvhhFBol5LEbhwAwAiDgdO6dLSDoi2UnbABAIwzEDhdV1qjWmVYsbYBAIw2EDhJZXCVquTK2sQoUzFNZQCAWRm8pbaOWZu8UZP3imaK8dkUdQEAZmYwcIrg3T6csNYhZ63Ll1PUBQCYmd7ACba5PwMnWqlDTn2XAwDwWf3TopV02wZFBQUrdFOUU9UFAJiZgWnRUauYtVKpqFLLaDJjnhoA4HL9gWOFnpWtlqFQaUu12RWNVQ4A4HIDB7C1Wsbuuc3SN2pdKmwxSWEAgHkZPPFz3xZKpwkDhUnJm0kKAwDMy0DTgCu5ydTNU6uiMTUaADDKYHqsirb7r6+1ilIUz3AAAJfrDZwYtnqxOGgRgr7QdqqaAAAz1Bs4RdzoanFUGUylBbmk1o8TlQYAmJPBW2opB1XB1HhWpGkAADBSb+CkfNSbw0rJpeyuF5UrsPETADBC71kDMSwUzJXc9UFHPasCKxwAwCgDwzsLPV/eKbsUZKpzkDsDPAEAl+tvGggLZTcld2W5mmxT1QUAmJmBFU6pN4eVljGoUiF3KXk7VW0AgBkZGG2TtGsLHVJWVtZ3dZCLW2oAgMsNjLbJerXe6ZCzWmWVQUyLBgCM0hs4x/ZWN6u9NjGqUFB2qQzrqWoDAMxIb+C06VZmrkUwRUXdJSnTpQYAGGFw0sBvbq912yYFmW4bDmADAIwzGDj7ttQ+t8pyvW0SgQMAGKU3cLIfdF0dZTIFmdYxqBAnfgIALtcbOGZRq7JRYUEfdKdNEVQZTQMAgMv1B45KtSnIJO3Crhtx45z4CQC4XO/wTj+dfdN4VqFCdXYV/Z8CAMD36l+uWKHspsK6y7JLC19OURcAYGYGRts0isFVhaCgoNZdWXmq2gAAM9IbODFsFcy1DEFZWatoCsOd1AAA/J7B9OhuqUkLX2hdmJbcUgMAjDBwS607iiAGKajrVuv+FwCAywxMi25Up6iUu7bo940r8QwHADDCYNNA8iAz6dbeKJpkrHAAACMMnPi51qastQim7El3ybmlBgAYZbBpoE5RMUiVrVUFI3AAAKM8KnBMkivpi0V3Lg4AAJfqv6UWKkmSS0pqlV0qWOEAAEYYXOFEc1VBiip0TKJHDQAwykDgBBUhaVO41n6lY3bdqZ6mMgDArAwcwFbrmAqVQSq8UDTTRtVUtQEAZqR/heNZu6bSvjUd7aCrUqqMpgEAwOV6A6cqn6vJQcGkVq2iScFoGgAAXK5/WrQtVMWk7NJNfi6XdFWwwgEAXK43cBZxK0kqzO/331SBFQ4A4HK9gdPkvdxNrZuOVqvNEnkDABij6PvNYKWiZbXebfy8Kl3JSRwAwOV6A8cUlLxbBEUVelElNbn3UwAA+F4DTQOlzFz7VjrYXpsiqww+VW0AgBnpDZxVeCZ3U3IpK2ufAs9wAACj9O/D8ZV2baFoUqlKuzZo15I4AIDLDQ7v/PZYqclS6d1Im++O3FIDAFyuN3Cu/UZ1Mn1os4KC1jHLyRsAwAi9LWeFRy1jVpO7ZzhfLBod8+CiCACA39ObHkc7alVkrWI8dam1WtA1AAAYoTdwdvZBKZuW0ZRPR69xABsAYIzB+2O3bVQ6Pbgxc71t2icvCgAwP/0HsFm3nrltsw62UxlY3wAAxukNnKRGXy4aPSu7y54tDmqd0AEAXK43cFxZq6LVtjBVvtLV8k4NT3EAACMMPsOpU1QZpK1fKxibcAAA4wwHTg7atdLCK2U3JaUp6gIAzEx/04AnLWNScsnlKkJSGM4oAAB+T296BIt6vjiqClJU0HJx1NI4DwcAcLn+83BUqAxJm0K6tqW2250WxgoHAHC5/sDxUmZSGVylmZarg5aRwAEAXK43PRo76q4pVGfT+9yoPla6KggcAMDletPjzt8pBld2aWWF9ru1ni+mKg0AMCf9TQOKerHaa124frws9WG/VmYrDgBghN7AqfNe0bK2RVYVpUVVa8fsTgDACL2Bswhb1anQXQp6W7uurj5oGacqDQAwJ72bapa2VZuDquB61yTFotUicE8NAHC5/kkDynq5udWzMmkdo3a3Wx0zJ34CAC7XGzilL/Sjr77VV8uDXq2C7g5LbQpWOACAy/UGzsa3WqzvFC3rkKSiaPWjJV0DAIDL9QbOc7+WJDUedNe6bp6/VcsCBwAwwsA+HOn2zY1Ky/pHz1zXX32nZeQANgDA5Xq71N7aXikFvVjd6WcxKVaNvlwcpqoNADAjvSucvX3QzYs3KkNS8qDbb15qWfAMBwBwud7AWftWi+uddk2ln7+51u9ev9AxcR4OAOBy/c9w3HR8v1EVk+Jp+83L9W6KugAAM9O7XPk2fqPffPNKz1Z7/eObd3r19a/VNOVUtQEAZqR3hfO2/SvlbFpWtW5Wd9q8eq19vZDEtAEAwGX6T/y0hb54+Tu1bdQ3H650fHul94eVJDbjAAAu0xs4q/BMq+1eVVXrx9v3+uZ//X1d0RYNABihN3AqW2n/fqvtdqd/8ic/V91UWhSNrP/RDwAAv2dweOcv/s9PJEnV81v9+A/+Wse21KJ6NUlxAID56N/4qfd6vv6g1EbtvvlCNz/7pV4+eyt3xtsAAC7TGzhb3egf/umfa7W+029+9Uq2yNpcfVCb2IsDALhM78OYZ/lGq5/8VotX3+llE1X/1Y3qw0LujLcBAFymd4VTKOjuf3+l+LxR8ZNWt7/6SpurnV6s/2iq+gAAM9EbOJL07S+/VvOrrdLfmJbXO73441/qD/NPp6gNADAjvYHzVbHW1//sz1W/udJ3P/9Dbf9lI+Wg52E1VX0AgJnoDZyXi6Dm3/yZ1n8W9OW//q32//RfqX5zpXUcXBgBAPCJ3qaBTSHlm3+gu589k0Ihaz5o9ae/05+8YLQNAOAyvYGza6X1f/kPyl++Urr+QuE//3fppze6KelSAwBcpjdw3tRZv/iPP9V6u9Ny81p//Zf/Qn+8/2/6i/eLqeoDAMxEb+D8on2t//Hrr/W+rlRa1vum0v/8T1/pv96+nqo+AMBMmPvnn8fcrH/m/+7rf65f31WSpOZ06Sq6yuCK5irNldx0zA9n5AST/t7qoKuyURmSorkOqdD7ulKdg76rSzXZFKz7Azcx6+p0my579+eUIcvMdVMdtSoaLYtWi7JRGVutlgeVRavNdqeibBSrViEm5RSV2ihPUfWxkvtDTe4mz6ZYJFWLWmZZCi4zl4Wujli0Cqc6Qvx0fI+FLJlLH//M0+dZcMny/Z8hSe1+qfZQnf7OrsEip4dGC3eTsinn2H18+t797b/zY/1m3tX7PUKZFIu2qyUmSVKqS+UUFWKSmSuUrWLZymLurvGg3ER5DspN8Ult539vW5fSx5/r2denbHIPyilov9uoaQrt7lba1wu1Oapuo5IHHdtCWaaUTVmmNgc1OSi5qTl9X4K57PTzjuaKIau0rOTdddlNdQ4yc/3Ri9d6fvVe2+1Oq+1O7qbcFvJsappS7qbl6qBQtCqq5v5nacHl2eQpyrMppyh3k9nD12Qhy3NQe6yUU9Dhbqm6rvR+t9XvdltlNyUPcu/eo+ef725qT7+XPCiffi37w3vcrPv34m6ffF3Jrfu6ze8/77yuaK5w+j5J0svVXn/w8rf3738zV113Ndd1paYtFELWomo++ZnG2CrErKJoFYv0yfs/xCwL+f7959nu3wM5xYf37+m94B4Uiu499fFrk6TUFveff86C378X799Cp59BTkGpLj953338N9nUpXIOaptSd4eFUoqq21JNijo0leoUdWgL1SlqVbbaVkdFy4ohK7vp0JRqcjy976Kyd+/B85/P+fc8nN6H1enfUfdzluzsy/n48cefR1D3PXTv3uPh7OiW/LfODYunf8Ph7PuQTu/tZdHK5Ipn/84+vp8ObdH9N0Wl03vmY93JTa2bDqn79UM6vUfPvp8fv6dl8PuKgklNNiX/tHPs49+f/fz/z/3+e9H9WQ/1ZXWf//HXPn5t//Yv//33HprWGzgAAPxQ6G8GAEyCwAEATILAAQBMgsABAEyCwAEATILAAQBM4v8CWnHL48HuOe8AAAAASUVORK5CYII=",
+ "text/plain": [
+ "<Figure size 513.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZwAAAEeCAYAAAC+OaPqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWBklEQVR4nO3dy24kSZbe8e+Y+SUieK3MqupS9/SgIcxAEuYKSIAWWmopYbTSSg8jQK+glRZ6DEE7PcAAAgSoly2NemZ60KhLZ2WSjJu72dHCI0nWVKVbhnfTBbj+vw2dpJM8GRHkl+Z+zMzcXQAAvLTw/7oAAMD/HwgcAMAsCBwAwCwIHADALAgcAMAsCBwAwCyqsU+aVR/smXbv7XdfDgBgqRjhAABmQeAAAGZB4AAAZkHgAABmQeAAAGZB4AAAZkHgAABmUQwcG5+qAwDARykHTljPUQcAYOG4pAYAmAWBAwCYBYEDAJhFMXBiWM1RBwBg4YqB497PUQcAYOGKgZPS3Rx1AAAWjrZoAMAsioETrJmjDgDAwhUDJ+f9HHUAABau3DSg9Ow9dpUGAEzzEW3RF8+Or1+0GADAchUDp61uH4+zH16yFgDAghUDp37WpfZ8tAMAwDnKl9Ssfjr52TEAAOcoBs5leP14nPLuRYsBACzXWYt3hsCcHADANB+x42f8wWMAAM5RDJzKn7aYjoxwAAATFQPndX66h7OpXo+cCQDAhxUD59qe9sO5Cp+/aDEAgOUqBs5t/XRJbe2bFy0GALBcHzHx8/1RVHB2pAYATPMREz+HBTureK3W2xcvCACwTB85whlCZ6PV6LkAAHxIeT8clyRX9l6B7QkAABMVAye5JJliWOmg7uUrAgAsUjFwHvosKci9V1KeoSQAwBIVA+fr7igpKeUHGZfUAAATFQPn/WU098Q9HADAZOWmAbkkyWSPxwAAnKsYOHvbDwcWdLDjS9cDAFio8sRPDUvbuCftbfviBQEAlql8D+c0wgm2egwfAADOVQycO3sznBhYZQAAMN1HTPwcutSyH9WJezgAgGk+okstPR7v7eFFiwEALNdHNA3UkiTPO+397sULAgAsU3lpm/yNJMnVP4YPAADnOmtHtcTinQCAiYqBY89OcWfxTgDANGeNcCpjx08AwDTFwPFnWxL0fnjRYgAAy1VeS63/9vH4of/qJWsBACxYMXCqsH48jlxSAwBMVAycY3r3eNzn3YsWAwBYrvJKA/lpOZvstEUDAKYpt0Xb0wrRTbx+0WIAAMtVDJzr9vcej6OxPQEAYJpy08CzRoHk/YsWAwBYrnLTQGaXTwDAb++slQb82VYFAACcoxg4weLjsSmOnAkAwIedNcJhHg4AYKryPBx/uozWJ3b8BABM8xGB8zTZ0+ysAREAAI8+4h7O0y6f8dm6agAAnKMYOIktCQAAvwPFwGmfLWeTaBoAAExUDJyop0tqmZUGAAATle/h6Pk8HJoGAADTFBNkm988nRyaFy0GALBcxcDpnq2l5lxSAwBMVAycVbx5PKZpAAAw1VkjHCm/YCkAgCX7iHk4TysNxHDxosUAAJbrvO0JnBEOAGCaYuA04fLx2NhiGgAwUXmL6dA+e48RDgBgmmLgPL+MlvPxRYsBACxXMXCeb0nAJTUAwFRnNQ1UkS41AMA05YmfdvV4HFnaBgAwUblLzZ82XWO1aADAVMXAqfU0qgncwwEATFQMnIPt56gDALBwH3FJ7WmEwyU1AMBUxcC5zE9NAyxtAwCYqhg4u8CWBACA3175Ho6e7uHQNAAAmKoYOGvfPB4/X3UAAIBzFBOk9afFO2kaAABMVQ4c1Y/HDRuwAQAm+ojA4b4NAOC39xFNA0+X0Zp4OXImAAAfVgycqPjs5DhyJgAAH1YMnJv4tNJAfHY/BwCAc5Tv4QR7PG5sPXImAAAfVggcU/Kn9zZ+9eFTAQAYccZMTlNwJn4CAKYZTRBT1OrxkpprZ9sZSgIALNH4kMWCYpDsNBcnnDMgAgDgmdEEieFC2SU/zcV5vvsnAADnGA2clHfaVPY4wqmcVQcAANOMBk6wVibJbFjAM7LMDQBgovHACY1WUQphJUmqnJUGAADTFLvU6iDlPGzCtuIeDgBgotHAyX7QPkmuTqZKl4GlbQAA0xT6nIOiDd1qsoqmaADAZKMZUscLRZOqeCF5r1UkcgAA04wmSJ93CiZ1/Z1kQbXZ2OkAAHzQaOB0/VcKcsWwljyriQQOAGCawkoDNzpkUx0vFMKFVlGSCB0AwPlGA6epbtRlqQqtqnhxihru4wAAzjeaHq4sSYpWyyyoCpKUZigLALA0hbXUDqeQkUxB6+hjpwMA8EGFwNkru9TnvYLVomcAADDV+CW1vFM+DWr6vNN9T+IAAKYZX0strIeTrFbKBzX0CwAAJhqNkLb6RMmHLrV1/UrHPFdZAIClKXapbU5b4FzFL5iBAwCYbDRwjv1bHZJUa6VgUYkmNQDARKNbeAarlFxK6nTMWz30c5UFAFia8UtqPty0MQW5MmsMAAAmK8zDuX88rqzVIXNNDQAwzegltbp6pU0lBUVVtlGgawAAMNH4PBxFmaS1rpW8O300vnxVAIDFGQ2c7J2CSck6PeRv1GXJaI4GAExQuIfzoGjDfZvsnY7Z5awWDQCYYPySmtXDCEe91vETVWYyLqkBACYotEUf1GWTKaj3g1yMcAAA0xQCp9M+DUvc9Hmv+z5JojUaAHC+wlxOV5el7EnH/KCts9QAAGCaQuCYXreu2lrVYa17281TFQBgcYpNA3VwZWVFa3XpazEPBwAwRWGEExRNqr3VIb3TJ7GRGYEDADjfaOBU8UrhNA/n0H0jSTLVL18VAGBxikvbHFPQXvcyG5Zdc3VjXwIAwA8aDZw+P6gKrk/8c100Xyi5l74EAIAfVJz4WQfXxjdK3itLimE9U2kAgCUZH654VnZpb3tl73RVRaVMazQA4HzjIxwltWFoGqhCq+Qu9/0shQEAlqU4D2cVs0xB0drT0jYAAJxvNHDq+EqrOIRMl3faRObgAACmGQ2cGBolN7XeSGLZTgDAdIUdP3vt09OoZpe4pAYAmGY0cFbVrbps2tvQKFAZ20sDAKYZDZwubyVJtdfq0oPqwKRPAMA0xaVtsqSNVpIY4QAAphsNnHV1q9qGjaXreKGrmsABAExTWNoma5+HU+qwViRvAAATjQbOPr3TXRd0VK/snZJLEqkDADjfaOAEC+qz1KhS7wcds4vZOACAKcYnflqrKkg7HSRJa66pAQAmKlxS+1aVSbuwkylqU81VFgBgaUYDJ+WDtsmUlZW9UxNc3MMBAEwxvrRNPuqhl1xZfd7poTdxDwcAMEVhLbXh3k1UpWC1HvpZagIALFBxP5zbxpVObdHHxOgGADBNoS26UhNctTcKVmufuYcDAJimuNLAXRdUq1H27rSWGqMcAMD5xkc4Ydh4LamXe9YqmiR2/QQAnK/QFr1XG10Pdq9g9fuPzlAWAGBpihvcHNLTPZs9TQMAgImKTQPBpFYruZJ6J3AAANMURzh1eAqZTOAAACYaDZw+vdO2N3U6KlitOphoiwYATFEY4SS5pLVvZBZkoi0aADBNIXBMlUmmIFNQciZ+AgCmKd7D2VSug+3lyjrmrBAu56gLALAwhcAZLp9t7U6moINned7NUBYAYGmKW6oFSUFB1/WPdRkqOfdwAAATFC+pmbk2fqWoWpdVUOSSGgBggmLgBEmtr+TKcrnM2GcaAHC+YuDcNkkXvlK0Wvvk2tSfzlEXAGBhim3R0Vy9krInve17mbFaNADgfIXACdqmoCBTUqerWKmydp7KAACLMr7FtExtcLWqVNtKdTB1eTtXbQCABRkf4VjQOiZFC2p8rSApeT9PZQCARSlsT7B+PK7VKAapjbRFAwDONxo47r2OOZxODMrM+QQATFQInIPqkLUKQ2faRWWycic1AADfMx446nXTHNUGU+2NbhunSw0AMEnhHs6FrpuDVtH0hW7UZVMQ83AAAOcbDZwqXqmJSV0e3r/vpW1+M0ddAICFGZ+HY0GHvlIThk3XmiAd0rtZCgMALMto4MTQaNvX2lTDZtMXleui+myu2gAACzIaOHXYKJjrmKSDJ902maYBAMAk4yMcq3Xd7BWDlJR0TKbeD3PVBgBYkNHA2ae3kqRoUqekzm2WogAAy1OY+Jn17WGtLktH65RdSt7NVRsAYEHG26JDq11fade71t7qXWd0qQEAJhkNnOxZ181B+9Miak2QgtWzFAYAWJbRwOnSg15vHnRdB21tp8AtHADARMWVOIO5okkH20uS6rAufAUAAN9XXGng291GD70rqlKXpTawHw4A4HzFwElueuiTbvO1Hnqpsc1ctQEAFmQ0cPq006bu1LmrPY1wjr6dqzYAwIKMd6nlvZrYqzZTp6w6SK48V20AgAUZbxqwoGOqtI5RB3UKJrXGPRwAwPkKG7BV2na1enf1lpSyFMU8HADA+Qr3cN4o5aBoUlbWNrmCFzupAQD4nmJ6XJ62mF57q+SuUP4SAAC+p5geMbiaYLq3B9Vm2tvDHHUBABamGDgpm7osRVVqorFaNABgkmLgHFMlSWq9UTTpWp++eFEAgOUpBE5UlqkyaW/70zYFrDQAADhfIXCyomVlSXfhnXqXKo+zFAYAWJbCPJyNbtdbraLUeKNgUm9prtoAAAsyvsW0OvVpGNFEVaqDdOGrWQoDACzLeOB40v2xlUna2VbraKqZhwMAmGB8ewKZkofhUpoOiiZ1LN4JAJigcEnNVYek7NLKL7RL0p2xPQEA4HzF62PZTVe1q/WV9snpUgMATFLY8bPWMUXVYWgaCBJrqQEAJikGzkNfK/nw/k1j+oQtpgEAExT3w0l5OCWpl0taBUY4AIDzjaZHHa8ejze+GRbxNHvxogAAy/MR2xNkpSwFmZogtYHAAQCcr9AWneVu6nzYDyeYtIoEDgDgfNXYJzfVa/U+BMza17qqXdkJHADA+UZHOId0L0mKNqw6cF1ntUzDAQBMMBo40YYBUHKpUtBFlcT4BgAwxWjgrOKNJKnL0r1ttU/hcU4OAADnGA2cjX2i5KZjlo521K4ncAAA0xS61JLeHBodklR5pc6NwAEATDIaOK2v9a6LOmZXrUaftb06dicAAEwwfg/HN7quh+0JJOnT1Z7AAQBMMho478K3uqiSKpO2dq/aspJzTQ0AcL7RwNn5W+1T0Cqaam+UnIU7AQDTjC/eaSttU1DWsA9OlrSjawAAMEFxyFKZa9u7euu1ib1YSg0AMMVo4Oz9Tq/bTle1qfJKn1/eyVhrAAAwwWjgZO+1ikmbSmq9VVt16mkaAABMMBo4Xd5q21eqTLpUq+RBmcABAEww3jQQNurctE/DatHupk2kUw0AcL7R9DAF3dSddkm6014xZLHDNABgivHAsaDL+qiLahjh1LGnLRoAMMlo4ARFNTHponLd2lq3N2/nqgsAsDDFGzLBXO7SKgStL3aquKYGAJhgNHAqtfr2sBoaB3LWYd9qxcxPAMAEo4Fz1FZtSIomNSFov1vpqp6rNADAkoxvwOZZr9ZbXVVZn7VRb++v2J4AADBJYcfPIV2a6KqDVMdePYEDAJigOA9n31d66IPeHV0Xm63aOFdpAIAlqcY+uQ43Sh60jlnbJLWrgzajXwEAwA8rLN6Z9NnlnV41vS6qoPv7i7nqAgAszGjgrHSpLz7/UhdVr1U0dV2tTWSlAQDA+UYD59Kv1W52iiFrf1rSpol0DQAAzjcaONc+XEJLOSiY9NlnXys7Ez8BAOcb3/FTR7395hOtq15/cOW6+vw32jDCAQBMMNpzdrCD3E237U5/cptlMemL9X6u2gAACzI6wumt1+2rN2pikpnr4atXamOaqzYAwIKMb0/gQc3FXru+1i/eXevtmxv1mR0/AQDnG5+HY1nHh5U29VFZkrvp1Wo7U2kAgCUprhvw9Vef6qLd649uv9XrL77Su4fLOeoCACzM6Ajnrb5WSpWautdVu9fm0zfaHtq5agMALMh404AOev36G6UU9Paw1vHdpd4dV3PVBgBYkOKOn6urB9V1p5t2py//5h/o9eZ+rtoAAAsyvrSNbnXcrnV9fac//bOf63Bs1cReVr71AwDAdxQuqfX6P3/9U1nIal+9049//2916GtZWM9VHwBgIUYD517faN0c1B9rPfz6tW7/9H/r9fVbtdUnc9UHAFiI0cBp7VJ/+E//p1abvb78ux/J2qzL6ztVgU41AMB5Rm/GbPxKq9//Ws1nb/VpCup+daPjvlWww1z1AQAWorhOzf6XnyvedIo/TXr3Nz/SerPTJnBJDQBwntHAucpX+vIXP9XxV9dKfxvUXOz0+p/8lX6W/2Cu+gAACzEaOJ/YRr/3z3+u45sr/ebn/1DX/2onC66bwORPAMB5RgPnJ+ta3b/717r4C+mzf/trbf/ZX+j45ko3VZyrPgDAQow2DVxUUr7+mXZ/ciHFlezwVus//0r/6Mbnqg8AsBCjgbNP0ua//SflH/1Y+fJW9l//h/RHt3rV9DOVBwBYitHA+fqQ9Yv//MdaX2y1unijX/3yX+gfb/+7/td9M1d9AICFGA2cv0q/0S+//EJ3h1ZmruRBv/4v/1J/+fZurvoAAAth7h++H/OHl//G//3PXuubQ6t9CnrXDc0CTcyKJtXmamJWyqZjNmWZkkvRpJ+s97pt94rmiiFr31f65rDSMUX95lipzyYzKZrrssq6rDtJw66ikpTdFMx10xy1qTq1Va9Nc1Adk9btXlVM2lxsVdedYtMpVknuppyClE19Vyu7yXOQu6nvKvV9paY5an2xG/7xloe3cXgbYlaIafhYcJm5FLIsuEJMspjl2Z4+H4bHLp8eF9nwsX7fKHeVcopDPafv/VxOQZ6C3L/ft5HTULOdvp+Zf69Wd5OyyWJWiFkWTm/t9HyGZz/vtC34+8f27/+snL7bBOLZHr//8HXhO1/7/vMpRW0fNur6SrvDSvuu1qGvtX3/2PvwmggaajrkqGOKSm5KbsM5p+9ZhawgqQ5ZTUin19LwPXo3Veb66dVb3WwedHX5oMvru8fnO7sp9cP/ndrVQfH0WvrO8/jscU3HWikHhdPHQ8wK1ekycTa5B+13Kx0Pje4fLvRme6mUTV2Ocjd1p9dU1vCYpGxKp+cx/b3HOJorWpaZVIekYK5oriwbHots6jyoy2F4XE7PVR2G57Kyof4gycz1erXVT15/rbbptN5sFWLWYd8q9VFdV+vYNYqx16odJmeH8Oz1cnrrHoZzNnuFkFSvD995fb5/TeQUlPtqeA76KM9BVdUrxKzYdKqaTp7t8fVj739Wfnq9DK/h4XxJ8hTl2dQf68fXv7sNH+u/+//f1EflHB7/XSkFHfpafYq6O/0tOaRKfQ66qI+6afcKISuGLHfToa8fn7c+B3U5ap+iuhx0SFHZTU1MiqfHOT4+1t/9mxhPv+dBw+efP/fvz80aXs/vX7Pvv8/z5/3978T786Plx+c5nn6/3/+s96+p5EH7vlJy066v1OWnvxfJTVlSn4Pu+6GSdPqdff+592dHc1XBFU4vz+x6/FcOP8sez7Pv/5k4vXakLKk7/YzK9Ph49D68H0+/U//hr//jD36X0cABAOB3pbjSAAAAvwsEDgBgFgQOAGAWBA4AYBYEDgBgFgQOAGAW/xdANvsp+hiflwAAAABJRU5ErkJggg==",
+ "text/plain": [
+ "<Figure size 513.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZwAAAEeCAYAAAC+OaPqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAV50lEQVR4nO3dy44kSXbe8e+Yucc1L3XpmmbPUAIHJAgShERAEEBoozW14VKAXomANtppp9eQFnoEaaFZ8j4DTnOmq+uWmRHh7mZHC4+8FKfbLcN7wgW4/r9FZVSVR+XJzIj6YObHzMzdBQDAuYX/1wUAAP7/QOAAACZB4AAAJkHgAAAmQeAAACZB4AAAJlEN/aVZ9b090+6d/fbLAQDMFSMcAMAkCBwAwCSeETjx/FUAAGavGDhmBA4A4IcrBk6w9RR1AABmrhg4rjxFHQCAmSsGTs6fpqgDADBzdKkBACZB4AAAJkHgAAAmQeAAACZB4AAAJlFe+Dm8vycAAM9SDJwYr6eoAwAwc+WFn95NUQcAYOYIHADAJMp7qYXVFHUAAGauGDhVZPNOAMAPVwycOjwGjhmjHQDAOM8InM2T37FzNABgnHLgPDkPx709azEAgPk6cacBP08VAIDZKwZO0uOoJtj2rMUAAOarGDhX+tEUdQAAZq4YOEs9dqaZsa8aAGCcYuCs82PTwKJiXzUAwDjlnQaeXLKIF2ctBgAwX8XAeWmP63Ci1WctBgAwX8XAWYX+ErPFZ2tyAAA4RTFwFg+Bszx7MQCA+XrG1jb9+ptglV7oyylqAgDMUHlKLZqkviV67ZvC1QAAfLdyl5pJrqQqrLV1dosGAIxTDJzb1mWKSrlRZrdoAMBIxcD51CW5krIfdGN3U9QEAJihYuDkh18DIxwAwGjP2Gmgb4mu4/azXQcAADhFMUEOnuXeKlitzropagIAzFAxcO68kXvbd6o5u0UDAMZ51uadZrWyM7oBAIxXDJxvwrdyb9V0H5hSAwCMVgycvd1K6u/j3NmnCUoCAMxRMXBaP0hyiZZoAMAPUAycja76B55ZhwMAGO3ZJ35aWKvzw9kLAgDMU7kt2j5J6neMvsvvzl0PAGCmioHzrv25JFfOn+SeJigJADBHxcBJT6bRmFIDAIz1jPNwHncXSLk5azEAgPkqBo77Y2faIm7PWgwAYL5O2v65y0ypAQDGKQbOMl49PGY/NQDAWOUptSeLPZ9OrwEAcIpi4FS2fLw4cDwBAGCckwLnov7yrMUAAObr2U0DIVzSNAAAGK18PEH++PB413171mIAAPNVDJxotSTJ805mJ3VRAwDw4Nk7Dbg62WnLdgAAeFBMkF33/uExW9sAAMYqn/iZbh8eZ2/PWgwAYL7KJ37WXzw87tL7c9YCAJixE2/KsNMAAGCcclv0k1FNFV+csRQAwJydNMJZVNfnqgMAMHPFwNlWbx4em+JZiwEAzNdJu0U/7VgDAOAUz7iH8+HhcXb2UgMAjHPSbtEAAIx10pQaJ34CAMZ6xl5q9cNj9lIDAIxVTJBluHh47IxwAAAjle/h6PEejiudtRgAwHwVA6fTY2daDOuzFgMAmK9y4HzWCs09HADAOMUEqe1xVBMDLdIAgHHKR0zrsUuN83AAAGOVA+dJW3SXPp21GADAfJ208LOKl2ctBgAwX8XAebrYM+X9WYsBAMzXSSOcGFZnLQYAMF/FwFn4Y5daHbdnLQYAMF/FwMn2OMJxzwNXAgDw/YqBk/TYCm3Gwk8AwDjlhZ/+uNjzaYs0AACnKO8WrcdGgS5z4icAYJyT5sjadHuuOgAAM1duGvhs4Se7RQMAximfh+PVw+Nt9easxQAA5usZm3dWpUsAACgqBs7al5KiJNqiAQDjlduiVUnHo6UP6eO56wEAzFS5Ldriw2PjxE8AwEjFBDl4eni8jV+ctRgAwHydNGRZ6+pcdQAAZq4YOK+rx61tknHENABgnPLxBMEeHje+O2sxAID5KgbOKprMFlPUAgCYscFVnVV8rSDJrJ9WO/jNFDUBAGZocIQTw1Jmknsr96SV0TQAABhnMHC6dKvkkryTlLTSxTRVAQBmZzBwUr5RdsnVSZIq9lUDAIw0GDhmUasoxXAtyY5HFdjQUwAA+E6FwOmbBdw7Sa7X+bUkn6AsAMDcPGungRD6Y6ZfBg5gAwCMMxg47gfF4wxaCJds3QkAGG0wQ+r4SssoZW/kfr+tDfdwAACnK9zDCTL1xxK4H/RyUSnYZqLSAABz8qxZshjWMluqyS4XG3gCAE43GDjr6pXqcOxK807xuOsAAACnGgycYFFtNsWwkKzSZc39GwDAOIOBU9lSyaU6rBWOj1mHAwAYYzBw9unDQ1v0fQMBXWoAgDGKTQN1cLmyJGkZJUY4AIAxhhd+KiuY1KTb0qUAAAwanlLr3ut9Y6rCWmZBFbNpAICRBs8bMEXVQbqufqy7sFaTpyoLADA3gyOcy8VXkqROBy3DhYJJNA0AAMYYHuFYn0eHfCNTP6Vmig8HsgEA8FyDI5y77q3a3O+lVoXj2Th0qQEARhgMnKb7pLtOilb3v8+SHR8DAHCKwSm1Kq61qaR1e63krVqaBgAAIw2OcOLxiOns6WGbG1OcpDAAwLwUFn4mLY5XrH0r9/7PAAA41eCUWpf2CibVtlSjRh39AgCAkQZHOF2+VTQpeq07f6cuu9z3U9UGAJiRwnk4Cy2Cq7GdKi1FzwAAYKzCjpxZrr5pwJW1CCbRNAAAGGEwcHJulF1a6UJJrZrsMiNwAACnK545sE+mnT4qK+m2S3JvpqgLADAzw/dwwkJ16I+aDoqKxsadAIBxCk0DS0VzBQXl4/qbKr6epDAAwLwMBk7KOy2DtPKtsnfa56zMlBoAYIThpgHfaRmzgoLafKdF4JhpAMA4hQQJcknB+8tWIcj9cP6qAACzM7yXmre67fpLoi31qevoUgMAjFI48bPWVZ2VLSsoqmZKDQAw0mCCmKI2MSt40CHdaBWCQricqjYAwIwMB45Vyq6+LdpbNTlzDwcAMErhPJysLKmzTq6sYCZ3zsMBAJxuMHAW1bU2MWvpK0Vb6pCzxAFsAIARntUFUHnUMl6cuxYAwIwNBk60pW67qM76Uc2SLjUAwEiFnQZa9ZNonbInRZMkNvAEAJyuEDjdw+M2746P/Jz1AABmajBwlvFS0Vy115KkRNYAAEYaDJw271SZFBXV5Z12iQ41AMA4heMJGkVzJVqhAQA/UGGngaB9CuosaV290o/X9VR1AQBmZjBw1tUrfWyjlr5QsKhVnKosAMDcDAZOFZbKx8emoEBbNABgpGJbdHLpYI2afKPs/Q7SAACcqnA8QVBt/SaeXT6oyZKrG3oKAADfaTBwmnyjLGlnd5KYTAMAjFdch7NPpqysYLWWzKYBAEYqTqk1Wf0R0xbU5v5PAQA4VbFpYBGk4EFt3im5ZMZaHADA6Yq7RSeXKlVK+aD3TZZ7O1VtAIAZGT5i2jvtUz+1Zopqs4vdogEAYxQCJytafx6OJK0iB7ABAMYpjnCyS3u7lbOBJwDgByjcw7lTHaRKS9Vxq33KQ5cDAPC9njVHtvCFTFH7TOAAAMYpBI7Ldb9xZ9AicA8HADBOMUG63O+lJkmrEMTCTwDAGM8asmRl7bv3/RPCxTnrAQDMVDFwLirXxvuQcTkLPwEAoxQDZ3HcsPP+HBzOwwEAjFEInP5+zcH2quJKycV6HADAKMUuteR9W/Q6vpSJEQ4AYJziCKc2qfZ+h+hVNJlVE5QFAJib4fNwbKllzAoKMusvreJ2ksIAAPNS2EutVR1ctaK6fNAu+UPwAABwisH5MbOoVcy6sKWCojp32fOW7gAA8JniCKdJ/SWmcNxpAACA0xVGOH2zQDTTQhttKmNKDQAwynDTgGqtq6QgKVqt5K54DCEAAE5RHK5sYr/QM6rSXedKbG0DABihcA/noDYHRTOZooJJq3g9VW0AgBkZHuFYpWCuaKbgQdsqKLDwEwAwwmDgxLDWRd1PoV35C13V0kKbSQoDAMzLYOBk77SpWrmkK98omHSX301UGgBgTorrcKqQFU1aWlSbpSbfTFUbAGBGBgMn2FLpeO/m/mDpBSd+AgBGKN7D2XeVVlFKcm2iT1UXAGBmBgOniitVlpVd+uh7bSuXK09VGwBgRgYDp0t7retW0aSkpH22ocsBAPhew00DSjK5gknZ/HjiJ3upAQBONzylFtZqcqXk0tJrdVnspQYAGKUQOEvdtrXeHrJqVTpkI3AAAKMMBs6+e69V7GQyterUZrF5JwBglOGdBnKj69Vel7UpHbvT6FIDAIwxGDgpf1SQ9zsNqFYVpOxpqtoAADNSaDlzvduvtUv9gs89WQMAGKkQOP26m0OSLsNCbZY24eUEZQEA5mb4iGmrta5aNTkrSEruutCLaSoDAMxKYfPOtTaLRsFMd7nTOrLTAABgnMIIp9K+7dfdfNRO24qdBgAA4xS61D7pplnqkLOyuZJLSd1UtQEAZqRwAFvfllabKSvrtpOiqkkKAwDMS7FpYFl1WsWgvd0d91RbTVUbAGBGCiOcg65XO22rfoSTmVIDAIxUXPjZpEp16Ec2dWBKDQAwTrHlrOmisksb36gOjHAAAOMUAidq19VqsrS3vSRp7esJygIAzE0hcJJWVaerWnoXfq02SyYWfwIATlfcS21TNwrHjGk5mQAAMNLw1jbhQndtv2ln9qQt/QIAgJGG26LzTvuuUjQpWNSmkpI4owAAcLrhhZ9hreSPl0RzRcWzFwUAmJ/iws82h/4cHL/ULpkWrMMBAIxQaBoIiua6rF1Rld7uXQtGOACAEQojnFaVZW2i684+qXWfqi4AwMwUdxq4S5Va7/uit1XQOjDCAQCcbjBwlvWX2qeo2850ka91VUuryAFsAIDTDaZHFdaK5qrN9dpfKJq0COw0AAA4XXG4UplrdZxFCyatIoEDADjdYOBkbyVJd8l0p4MSPQMAgJEKJ34GHXLQp1a6DbcySTSqAQDGGAycOmxUm/db23jQi0XWgiY1AMAIwyMcBcWQFUx66dd6tei0oEkNADDCYHwswkbupuzSQa32KYieAQDAGMPrcOxC+/Q4h3bIpo57OACAEQYDJ6rWPkUl748lCCYdOJ0AADDC8AFsHtRmU5OlqKhVyLRGAwBGGQ4cBS1i1qc2687u9HLZaE/iAABGKAZOl4M2lSlb1vLYsQYAwKkGA+dG79Vm0yKY1nmt5KbaSBwAwOmKq2oO2bTrXGst1bmJvAEAjDEYOEmtgkkxSCbTtuqmqgsAMDPDJ34q60XdKbt0p71+tLlhLzUAwCiDgdP5QYuYdFmbalW6Wt+x8BMAMErxHk7KQesoXdtKXY50qQEARhlui7ZKn7qoJkvBTG2KSsypAQBGKI5wlsH17uD6kPdKOSiTNwCAEQYDp807XdWtqmCKirpc7RTpiwYAjDAYOJUtdVk3+nLl2lilly/fa8kBbACAEQYD5yK8lpk/nPIZQma3aADAKMPHE3itfaqOxxO4urZSxYwaAGCEYtPApmolSddVpd1+xZQaAGCUwcDZ60YXi0Ym6at10G6/Ul2MKAAAftNgfBz8RnXsVJkUTVouGgIHADBKsUuty1GdS/+0cy0XjTaRhTgAgNMNBs5WL9WloMqku5S1XB0UjcABAJxuMHCWWunF5k7Xi06vl1G7u7XyVJUBAGaleEfmJ1/9Uq+Xjd6sXO6mFVNqAIARhu/heKXldqfKXB+afgHOJjLGAQCcbjBwVr6SJC1C0i5JL169V5tZ+QkAON3wOhzb6+PbF7pe7vXTi6zV5S33cAAAowyvw7G9mmahy9Vef3T1SRaTfrLZTVUbAGBGBgMnK+vl62+VclDyoE/fvNT2uNUNAACnGAycWgstL3a6axb6m5uN3n37Sk1iMzUAwOmKI5zD3UoxuOrgirHTyxVTagCA01VDf/lR3+jbX32hF+tb/dF1rS9/8rVud5upagMAzMjgCOdj97V2+5WqmHS5PGj7O9+o6QYzCgCA7zS8tU280Jsf/VopB73frdV8vNCnw2qq2gAAMzIYOC/sx9pc3WhRd/pie6Nv/uErXXEPBwAwQnEvtdv3V7rY3uhf/+nP1HWV6pgksdsAAOA0w11qlvX27SstVwctX33Q69/5tdoUtay/mqo+AMBMDAZOp4MWi0aH/VK3X3+hl3/613p99UHLeDlVfQCAmRg+gM2v9S//+K+0XB309S++khau7eWN6kBrNADgNIM9ztu81fr3fqXVl+/0potqfv5Ch91aC+MeDgDgNIWtbSrt/+GN4quDqp82uvnlG20vb7TW1VT1AQBmYvgANkX9+m9+V+0vL5R+EbS8uNOrP/x7vc5fTFUfAGAmhg9gs0q/+2c/0+Httd7+nz/QxZ8fpqoLADAzg4HzL9YLtf/pP2j7F9KP/uM/6u7f/oWa9xd6Q9MAAOBEg00DV7WUX/y+dv/qSrJK1t5q829+pT/7gqYBAMBpBgNnn6TN//yvyl/+WPnihey//2/pT17puk5T1QcAmInBwPnmkPW3/+2Ptd7sVK/f6etf/Dv94d3/0l/fLKaqDwAwE4OB8/P2Rl+//ULvfr5Vm4PWVasP/+Pf62cfaB4AAJzG3P17//L3Lv/c//Pvv9Hbw0ofm1p/d1tLkuogRXMtg1SFz58fzRVMuqySFiErhqwgqclBH9pKbTbtUlByqbb++lXMWsX+38kuBZPWMSmGrG3VaRmSNnWji+VBVUxa1Y2qqtPl5Y3qulWIWWZZqavUdZXcTamL/cfUf8y574+o6071opGZK8bPpwZjlRSqTu6P96j6f9sV605mLquSQkzy/NhvYSH3H2P/sbtbqT0slNuopulHg/F4jbspuynY59+3/ORzxpBlx++r/bPr7mszc5nlz3+YsX9etWj75x0/p3JQTkEWXHb8mj1FebbP/s18/LN8/B62Ta2cw0MN7ibPJguuELLcTfvdSm1XabdfadcutG9rfWxWym7qclCWVFuWmR5+n44f3U3pyffDzBXNVdn9a8GU1T/PzPXTqw+6Xt/qcnuri8ubz753XdO/NlebveLxNRHrrq87m/z+mmzK+Te/9vuvsesq5Rx0e7tR0yz0ab/Wx/1ayU0pf3ePTedB3fHv7r+uzvvPaeYKkhYxaVO1x8/VP69JUdlN92/B1oPaHB6uD3r82ScPSm56tdrpJy++VV232m7vFEJW29RKKSrnoJSDqpi0WDT981Kl/ORrrapOsUrH10//HqiWzW+8ziQpd5Waw6J/zab+6wsxK8SkqkqKddu/Ju7fY+nz70+IWaHqHt4/T19jnoLcg8yyLGZ5CmqahXKKD+/drqvUpaiUKh2aWsmDmq5Sl6M+7Ffap0rJTW0OWsWkbd0qWlYdkswk9+NrI0e1OSgdv7/3z3E3xePXHUPu/++SKwZ/+Llk9T/HrL7DKh7fc0+vuf/905/XZ99H2cP7+/59H+Tf++dm/vielPWvEz2+b7oclN3UuqlJQa2bPrZR+cmnD/bYEXb/Grz/v/r+sqf/57j3r8tgLpPUZXt4LWfp4d8Ox/+zn/6kD9nU5fvP1V/zl7/4L995o38wcAAA+G0pHk8AAMBvA4EDAJgEgQMAmASBAwCYBIEDAJgEgQMAmMT/BWRO9iqh3BsvAAAAAElFTkSuQmCC",
+ "text/plain": [
+ "<Figure size 513.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZwAAAEeCAYAAAC+OaPqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWkElEQVR4nO3dy44kSXbe8e+Y+S0ueamqnu6hhiNxKAEkQVBXrrXRRoLWWumZtNETCFroGQRttNEbEAIBCYQu7GkO2d2srMpLXNzNjhYemZWlbrpn+Ey6ANf/t+mozsjqU1He+cHMjpmZuwsAgNcW/l8XAAD4/wOBAwCYBYEDAJgFgQMAmAWBAwCYBYEDAJhFMfRFs+Jv7Jl27+w3Xw4AYKlGRzjBNnPUAQBYuNHAyX4/Rx0AgIVjDQcAMAsCBwAwi9HAseG+AgAAXmQ0cFwc7gkA+PWNj3CsnKMOAMDCvaAtup6jDgDAwtE0AACYxQv24ezmqAMAsHDjTQN+nKMOAMDCMaUGAJgF+3AAALNgHw4AYBbjbdFhPUcdAICFe8E+nGqOOgAACzcaOFVxMUcdAICFGw+csJ2jDgDAwr2gaSDPUQcAYOFe0BbNVh0AwK9vNE0ip0UDAH4DRgOn+Oy06PiKpQAAlmw0cFZ29fTaZK9aDABguV7QNJCeXht34wAAJhoNnNpXn94cmlctBgCwXC8InE8hUxfXr1kLAGDBXhA4j9NopiIwpQYAmObFm2yCrdWEy9esBQCwYKOB81Xsj7YJgUM8AQDTjQbORREkmbJ36vwwQ0kAgCUaDZwmmoKtFazQRfhyjpoAAAv0gsCRZEFmhS4yazgAgGlGAyefbpg2Ra2f7ckBAOAco4FzzJJ7q+ytShVz1AQAWKDRwLnvXO6tirhS4Cw1AMBEo4Hz7fEgKSlyjhoA4NfwggvY+lFNl3dq1b16QQCAZRoNnFZZkinlnQ5qZygJALBEo4Gz11HB1irjhQLXTQMAJhpNkJvwXu4HmQVl5TlqAgAs0GjgBAW5OmXv9Kvw53PUBABYoNHAiae9N+5ZRz28ekEAgGUaP2ng2TTaxt68ajEAgOUaDZyD7SVJ2elQAwBMN360jffTaMFKtVxPAACYaDRwOvUhk/JBNx1NAwCAacYD5zSq6dKN3GmLBgBMc8ZOzixnHw4AYKIXB45ZKeOkAQDARKMJkk7dae7p1YsBACzXC2787APHLL56MQCA5XrxCMdUqgjciQMAmGY0cKqwlSRlf1Cbd69eEABgmUYD51NnWlCw4pXLAQAs1fg+nNzvw+m71FjHAQBMMxo4j+s2psh5agCAycan1J61Q6d8fNViAADLNbooU4a1JMn9oC6z8RMAMM1o4Dw2Dbg6idOiAQATnTVkqYovXqsOAMDCjQbOrnsvs0pSlBlTagCAaV6UIO5HmZU6tN+9dj0AgIU6Y+OnFMPqVYsBACzXi+fI3Pcq4uY1awEALNho4DTxWpJkKpRz99r1AAAWajRwVuFKkuRyJQ7vBABMdEbbWVLKd69XCQBg0UYDp9X+2a+49RMAMM0LzlL71KVm4wcTAADwo8YvYLP16VWU2PgJAJjojCm1JDOumAYATDMaOMf0qVEghuZViwEALNeLTxro9+FwHw4AYJrRwHm8D0f6/JgbAADOMRo4wfrONFenYNWrFwQAWKbxwFF8eh0DTQMAgGlGA8ee3mKvXAoAYMlGA2ejN6dX/sqlAACWbPzwTv/UNMCNnwCAqUYTJFvW43RalzgtGgAwzWjgfNCna6UZ4QAAphpNkKRWrN8AAH5d4zd+2vbpdRFWr1oMAGC5RgPnTf7J0+vsXDENAJjmBV1qn04XyN6+ajEAgOUaDZydPR7YGWXPTh0AAOAcL2ga6CRFSVl1cfH6FQEAFml8H46yYthKclVhO/Z2AAB+1GjgrH2t7Ic5agEALNh404AqmZWSojqCBwAw0Wjg1PbYKJCenRwNAMB5RhOk9SQ/jWyC0aUGAJhmNHAO6iTvr5ZmhAMAmGokQUxXoZHLJZlqo0sNADDNSOC4NjGemgZca13OUhQAYHkGA8dUKJjkvj+9OYirpgEAUwwGjispPsuXg+3EVQUAgCmGRzhWqw594gTbaO0cbQMAmGYkcErFIJk1kgUFDzIVc9UGAFiQwcAJVqkKUgwb5fygUpVcaa7aAAALMhg4XbpR+fSOx6BhDQcAcL6REU6j0lzBSplV2ngzV10AgIUZDJyyuJKZFEMls1obK+eqCwCwMIMdAGZByU1lWCmFowozmQq5urnqAwAsxMgazk4fW+mY7yVJd7kjbAAAkwyOcFK+lSQduw8KVijTMAAAmGhwhFPEKzVRiqFvFrgMrOEAAKYZ6VIr1QRXEVaKYaVNERVsM1dtAIAFGQyclHcqwuPrg7Lr6TI2AADOMRo4u2QqQq1gpdaFcdIAAGCS0Ss8S3Ot4zuFUHx2cjQAAOcYDJy6/EJmUmmNghVqs8TRNgCAKQYDJ1qt5CZTULRSTZS4gA0AMMVg4DTxUkGuoKAqbE8HeTLCAQCcbzBw2vygu87U+kHb8E7RJLNqrtoAAAsyeNJAm3eSpMrWKrxQGST3dpbCAADLMrrxc1u4Nnmrzjpll5hSAwBMMTyllu6fXlf+OJVG0wAA4HzDXWqh0iGbDrbv32yScScOAGCC4cM7Qy2XlJUlSfed5H6coy4AwMIMBk72rCa4SlXq1GnPqTYAgIlG13BaNxX+vJmNNRwAwPkGA+fxoM6P4b2StUruMqtnKQwAsCzDgeOdTJIrK3rfLEDTAABgiuE1nHzULklrv+h/7Z9u/wQA4BwjU2qfThUoVMi9H/UAAHCuwaNtpL5FYGf3Cgpq3fWCK3QAAPiBwcAxlQomRfXrNvuUZUbgAADONzqlVgbpIl9Kkjp3uedZCgMALMtIl1q/hnOwvY7+oCRXoEsNADDByPyYK5p0sJ0e8ntFGVNqAIBJRtMjyFV6reytklzdsxOkAQB4qdHAKYIUVagIjYJM2Q9z1AUAWJiRwDE10VV5JVNQaSaxDwcAMMHoGo77aYRjtZoYJBvdugMAwA8MBo6pUBFcq9Ntn8klibZoAMD5RvbhdCrM1SopqlTnLhNt0QCA8402DTTRZTLt/aPcXYHDOwEAE4w2DeyTqVBQ8lZNDOzDAQBMMpgeRbzWIQd1ylqHN1rFoJyPc9UGAFiQkaNtstYxy+WSpOSu7LtZCgMALMvojZ+XZadSsX+ziSumAQCTvHhBJlqp7FLOd69ZDwBgoQYDJ8Z1/09FNb6R+yw1AQAWaDBwVsVb3XdRB7UqVamKJonUAQCcbzBwknfa5yCX68FuVdERDQCYaOSkgaQqZF3ZSqaoOs5VFgBgaYbXcKxUkBRkuvPvlL0/Xw0AgHMNBk72rGM23fhOXT6ozeK0aADAJCNNA9fap6AHe1D2tv8G9uEAACYYPWngIZm2vpFZVDTJGOEAACYYDJzOD3KXCkV1+aDkUpe+n6s2AMCCjKzhtLqssjol1XF7uoDN5qkMALAowyOcfFCb+4Cpbav7zse+BQCAHzWYHm261X0X1KrTLn8gagAAk402DQS5OksqQq0YJGNKDQAwwWDgVMWVJOlge7lnpSyJGz8BABOMTKndq3VT4X0rdJbkzo2fAIDzjXSpHZVcypZ1332r5C6zaq7aAAALMnyWWmhkkkqvVIWNqmCK4WKm0gAASzLaNNBEV+FRwUq1ub92GgCAcw0GTrBSZehv/KzCWp07gQMAmGQwcMyCquAqTm9rc56lKADA8oxOqe2T6aPdqfODyhBoiwYATDLSpXaQS9rZgySpMFOgSw0AMMHIlFqhJriu8xslb7liGgAw2ej82LZMutRqjloAAAs2GDgpPajLpoM6RSu1T66qYB8OAOB8I2s49/rQRh3UKlihKphMzKsBAM43OqUWrN+HI0nRTJErpgEAE4wGzir2e2/cswqTzBjhAADONxo4F2VSrVLRSrHtEwAw1WjglOa6DKVWdqmPbVZh9Rx1AQAWZmQfTqVjDrrLnUqvZZKilTOVBgBYkpERTlB26aBWUYXqYNqnD/NUBgBYlJHAybooO61UKpzeWoRmhrIAAEvzgpMGWr0r+3WbIpgau3z1ogAAyzNyH85KQa4ySJf5QruU5fSqAQAmGB3h3Hf9TZ+Fog7Ztcus4QAAzjd6AdsxB60LU1TQF1VUHbZz1QYAWJDRC9iym6JJWa7LSqpsPVdtAIAFGQ4cZf101V++tg2lskudH2YpDACwLCNNA5VWZasm9m9MLrXaz1QaAGBJRpsGbo+1gklmpjZLgesJAAATjDYNJDd1uX/jpmANBwAwzUjTQKc6JBVBSu66KrOSt3PVBgBYkPH7cMpWpn5KTeLwTgDANCNNA7WOKcolHXPWPpuyp5lKAwAsyegazsdjrS5LpZn2yZTElBoA4HyDgZO9VZuDXFI0U3Ipiik1AMD5Rk8aqGLSQydlSYXNVBUAYHFGRjhHrWOnJvZrOC6p5IppAMAEI00DhYqYVQbp4EmHJDW+mas2AMCCjF4xve8KfWxdktS51Oo4Q1kAgKUZ3YcTzJVdKi2ozVItrpgGAJxvMHC6dKMmdopmyu7aFFLtBA4A4HwvOmkgmJTkWkdXUjdHXQCAhRluGghrZTcldz3oIDPpaKzhAADON7IPp9WHQ6PbNus+3Ou+M1VezVUbAGBBRqbU8tMIp/BCxzxPUQCA5RkZ4Rz11eZWVQjqrFPKUkPTAABggpGNnxutq4Ouq6DCC2VJrdE0AAA432DgVOUb3R/6Ec2lb9VE6c4+zlIYAGBZBgOn7W5131ZKLnVKyi6F8U5qAAB+YOQ+nELJgzZF/+vkkhE4AIAJBtMjpQ+qQr9mk+XaJ2eEAwCYZHiEE1ZaV8fTSQNJx+wq2YcDAJhgMHBiaNSmQqZ+7aaJpg1t0QCACYZHOIrataWiSQc76Loy9dewAQBwnsHAKWKjh658ipjCpFrlDGUBAJZm5KSB/iybKvhTdxotAwCAKQbz49h90KY8qg5S7ZV2STLZXLUBABZkpGlgpWhZRegn1W5b1yrEWQoDACxLMfjF2OjDoVEwl8t1XZmSEzgAgPMNjnCaeKUs0zEFlYp6W7kyTWoAgAkGA6e0lSTJzHVhtSSpZoADAJhgMHA6P8jd5G4KZtrTNAAAmGi4LVpZWdIhSw+5U5apIG8AABMMb/y0WqVlJTe16tRE12qwzQAAgB83fFq0tzrmqOxSqUIXRZqrLgDAwoyOcKK59kmqLaoKdKkBAKYZPakmWn9c543vdMws4AAAphk+aUClWg9qovRgD7rtojpGOACACQYDZ61L/WrX33+z8tXpEE8AAM43HDi+1V0b1WZpq1rrmNn4CQCYZDBwgoIuy74zrZPrump1oFENADDBYODs7EHv6mN/46daFSFrl1jEAQCcbzBwbvSXkiTXp5s+U371mgAACzR4bkBjW912hdoslQo6JloGAADTDAZO6bWSm45JapUVTKoioQMAON/glFpW1jomPc6iXVX9eg4AAOcaPWmgCFkXpXQZSl1We7Ws4QAAJhhtiw6STNKbqtC6OrIPBwAwyWDgtHbQLvUbP4/Z5c58GgBgmsHAucvfqwr9HNpN2ymGzBoOAGCSwcA55js1Melt3W/2rIuWwAEATDIYOFfFz9TETu+qTtdloXdv3hM4AIBJBgOn8Y2q2B+eto6m1WanQOAAACYYbRpIbrrtolp37e5X2nN4JwBgguE1HN+pjp2CuUymh92K+3AAAJMMBk5Sq219UBVc72ppf6hVjG4VBQDgh8ZPGohJpbnq6Grqg5rA9QQAgPMNBk5la6Uc1Lrp5miqylYNJw0AACYYDJy93+rjfqXkpm/3WUXRqYocpgYAON/IWWpR6/KoTZH0kyZot2/mqgsAsDCDgbPVO/38q1/py2avL5usw6GmSw0AMMlg4LzN71Q3B7mb7rug7KaGKTUAwASDgWOn8YyZ6+YoXV/eqqRLDQAwwcg+nKSbmytFc/18nbW5uOOKAgDAJMXQFz+GD3I3vVs96PfcFIpOTeRsGwDA+QZHOJ11evPmRmXs5G66u7nUisABAEwwGDibvFXZHNSmQn92t9bt3Vabsp2rNgDAggwGTlSh7lBJkprgWq92T9cVAABwjuErpu2jvvvuC0XL+oPrG331i691We3nqg0AsCCDgfMX7X9VSkFFTNpWB61+6zslD5I4UA0AcJ7ho22s1Nu371UVnf7qYavj91dK2SQxrQYAOM9g4PxW8fvaXN1qVe/15fpO77/+Spv6IHHADQDgTIP7cFa+1v2HC2039/r53/1f+vjdG0nSuv47sxQHAFiO0X04X//qp1pv79V8caOLdzeSpCZezVEbAGBBBgMnK+t6e6vjsdLtL7/U9hff6GpzN1dtAIAFGb7x0yv9zh/9NxVF0l9985Ws6rTd3svGb6YGAOAzg2s4pZeqf/s7ffHug94eSh2/vZZn0za8m6s+AMBCDA5VVqp1+PoLxbcHlb/7oNtf/kTryzttM2s4AIDzDI5wNlbqL//739bfWu8V13utru5Uv/ugrf/xXPUBABZicIRTh6Cf/eM/1eGvL/X9n/w9bf/ZrbyLurRmrvoAAAsxGDgXZVD+1/9UF//yqC//1dfa/fG/UHu71nU5ODACAOAHBpPjujKlN7/Q7h+8kYdC8e5brf7+X+gPr/Nc9QEAFmIwcG6OrvV//nfyn/xU6fKt7D/+ifQHl7osOUsNAHCewcD5Zn/Q//j3f6j19kFlc6Obb/+hfnv3p/qfd9Vc9QEAFmIwcL63j/p4e6FffvulDl2pi2anu//yT/S/75lSAwCcx9z9b/zi71z8c/8P/6jS/aHRx0Ot//TNW0lSHaVoUhlcVXAFSXY6QNq9f72OWVXMiuYqzLVPUR/aKHfpkE2u/vcIkqqY1YS+jnT6/jq4ork2RacmJjWxU110WpWt3qzvVJatNusHxSKpKDqFmJVTkHtfSHaT56CuLZRzVNdFpdzf7VM3h08fgLlCTIohKxadQplk1tdiIT97/cPPybN99jWLSRZc7UOj7lApp6CUg4K5ivooM5enIPfw9DXPn2oOMSnYp/9OSlGpi7Lgn+o4/TPGvs5YtSqqz6/9zik+1db/vlnhdFOrnz6Xx//m4/s8BeXc33MUQpJ7UOqiUorq2kJdVyg8+wxyNrmb2q7/fO/3jR6OtY4pateW6jxo1xVKbv1nLCm5KZ9qSG7Kp19LUmnef04hqzB/+lpy064rZOb6+eZOF/VBX16919u3758+95yCdg/r/u6mopOZqyg7lVUrM5dZPv3Zw7PPwJ4+/6e/TzflFJRz0P5Q63is9HCsdbtv5OrrlaRjiv17T/8uuanLQdlN7ek9KQe5pCJkleaqYtKmbBUtP92a+9CWOqRChxR1yP3zcMyhr99cJqk5PZt++rzeNTv97Pqvn57/ELNSF5+e8bYrnv15go5tIXdTPD3L69VOq/Xu6c9r5k+fU3j+/9Hpeei6qNQVur9fK6VCIfTPXV21quqDLOT+9372bISQpGfP7OP/I55NuSuUU1DXFcq5f8batvzs2X7UdsXT+46pUJeiHo6V2hz18fSsPT4jdciqY1IZkpqie/p7Sjmo86B0+ntqnz37j89lMH96beYqQ376e/XT96T/63vKkJWlp9+vfPaz4vFzlfqfb9GyzPqfjVl2enY+V4WsYK4yJEXzvt5T7V0OypL2pz9P+/RnMXW5f+buu6CUdXompWBSkCuG/ues1P9slT4/698lZX98Xvrvl6RormCfv//xvY+REUP/tfTs+x/9m2/+7Y9eKTAYOAAA/KZwKBoAYBYEDgBgFgQOAGAWBA4AYBYEDgBgFgQOAGAW/wfaiFfP4XPeHwAAAABJRU5ErkJggg==",
+ "text/plain": [
+ "<Figure size 513.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZwAAAEeCAYAAAC+OaPqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWnUlEQVR4nO3dTY8kWZbW8efcay/uHm+ZlVVF1jAaEGpAzdBiwxohFrBErFnxvViwY82GLXvEEs1IwwLNTA/dVVNVWZkR4e72cu9hYR6Rkd3ZZuHWHYZk/H9SqSIjzCNOunvkI7t27FxzdwEA8NLC/+sCAAD/fyBwAACLIHAAAIsgcAAAiyBwAACLIHAAAIsoxr5oVvzOnmn33v7w5QAA1oozHADAIiYDx6xaog4AwMpNBo57t0QdAICVe8YZTr1EHQCAlZu+huP9AmUAANZueklNaYk6AAAr94wuNaZJAwB+f7RFAwAWQeAAABZB4AAAFkHgAAAWQeAAABYxGTgx3CxRBwBg5Z5xH05eog4AwMpNBk6w0R0MAAB4lmcEDrPUAAC/v8nASfmwRB0AgJWbDJyqoGkAAPD7e8aSWrlEHQCAlZsMnJ4lNQDAH8Bk4BRhu0QdAICVmwyci+LNEnUAAFZuMnC2dr1EHQCAlWOWGgBgEZOBU/vHazghXL1oMQCA9ZoMnI3vTh9FFeHihcsBAKzV9DUcH0bbmEWVkcABAMwzfeOnTJIUw4Wydy9eEABgnSYDZ2fV6doN/QUAgPkmU+QqFgpWqIhb7bgnBwAw0zNmqQ1bFKTcqLbLJWoCAKzQZOCUZo8fv/G3L1oMAGC9JrfzrKLJLEgu1c5mbACAeSYDJ9owwDNYUFZeoiYAwApNLql96LKSNyrDww2gNno8AACfMxk433etUm50TO8VFCT5AmUBANZmMnCSsvp0q+y9jLMbAMBMk4HzEDKmoGr6kg8AAJ81GTiNOpmVMgtqxGgbAMA8000D4YPkWVfFW/WWlqgJALBCk4FztHu5OgVFdWqXqAkAsEKTgdN5I3nWPr/TwfZL1AQAWKHpDdh0KVlQ9k7JuIYDAJhnMnCyZcmz+tzIFJeoCQCwQpOB40py9erzQXf+/RI1AQBW6HnXcE6y9y9aDABgvZ6xPcEwIdq9V2FMiwYAzDMZOLVvhwOtVhb34QAA5pkMnL3dShqu5QAAMNdk4LQ+3HuT8lHu7IcDAJhnMnAeeD7omD68ZC0AgBWbDJzeG5kK2WnXTwAA5nheglhQES7U5cMLlwMAWKvpM5x8lHurPt3Knr8CBwDAJyYTpAgbSVL2vVw0DQAA5pmepeZJUpTkyplJAwCAeSYDJ3kjKcmsUn4y5gYAgHNMBs7DdZsYrrgPBwAw27O7AIKVqoqbl6wFALBiz24aiKFST1s0AGCmZ5/hdOn2JesAAKzcM7vUpD69e/FiAADrNRk40cqHj9T1f/vC5QAA1uoZbdHdw0cy27xwOQCAtZreYvpJo0Bgx08AwEzP6FL7GDKMtgEAzDUZOJtw/fixM2kAADDT9BbT6Ycl6gAArNwZ13CixGgbAMBMk4FThQtJUgyXMpoGAAAzTQZOGXaShoYBs+LFCwIArNOzb/zM+ZYuNQDAbNNt0XrSFs3wTgDATJOB0+uhFdrkYsdPAMA8z96ATfIXLgUAsGbTTQNP5qeZaBoAAMzzzGs4Nhx8apEGAOBck4HT+l4Py2m0RQMA5prenkDd48fZ2xctBgCwXtP34ah8/DhY9aLFAADWazJwniri9qXqAACs3PT2BLpcog4AwMpNBk44HRLsQmXgDAcAMM+zA0cW1Gc2YAMAzDMZOLUPN366d+qZpQYAmOnZTQPunap4PX0gAACfMRk4l747fZQU7KymNgAAHj3/Go7ENRwAwGyTgdOok51u+AxWThwNAMDnPWPSQJB82OkzezdxNAAAnzcZODtVMht2/UyZWWoAgHmmAycWinFoHCjiZuJoAAA+bzJw9mnYVtpsIz8trQEAcK5n9Tn3/TuZlYy2AQDMNhE4ptKCXL3cO5VhJykuUhgAYF1GA8esVmF2Wk7rtAuvFYzrOACA800ETqlopjK+kpQVFOSiNRoAcL7RwKmKG5XBlH2YMBC9lLPNNABghtHAybnXqyrIrJAkFSpkLKkBAGYYDxzvVAUphlrBdmrsIInWaADA+UYDZwgayT0rhEpJPUtqAIBZRgMnnTZcS7lR9l6JWWoAgJkmznC2yi5lb5XzrYrTTDUAAM413qUWL5T8459f569euh4AwEpNNA1kmaQiXEiSLpwONQDAPKOB40oqgxRDJbNKWT52OAAAv9No4BRWK9qwD05dfq1SxVJ1AQBWZjRBunxQYa7L6u8oeadCJrOK1mgAwNkmruEMe+HUdin3pItYPu7+CQDAOUYDZ1O8Uu8mOx22CUE53y5SGABgXUYDp8+N2iz5aZxNYWKWGgBglonAOWgTJVOQWVQRTEW8Wqo2AMCKjHepnSYNbHSpMmy1KyQ/XdcBAOAco4FzXbxVcilbfmwgeJivBgDAOUYDR5KSS0mdCqs/GXMDAMA5RgOn9b2qIAUF7ey1ujxsOw0AwLlGb/ys7VJ1dG38Qkm9giRnSQ0AMMP4GY722vempF6lKkmSi6YBAMD5RgMnKCq51Fuv/HAvTny9SGEAgHUZDZwm3ymYVHmlre/Uu5Qzc9QAAOcb357AkySp9ErBTX2mTQ0AMM/4pAFv1KThPpxDOKh3KQRG2wAAzjc5LXpXSAfbP36OSQMAgDlGA8cUVNqwjHawvTbRFikKALA+k1t4dm5yJWVlHVJ+nBwNAMA5Rs9wdsUbZZeyhqC569Pj3jgAAJxjND2Sd4omRZVqfa8yBIVQLVUbAGBFRpfU2nynOroqrxT0RoWZ3FlSAwCcbzRwUm61Ca7rfKPWOjU506UGAJhlfEktH5QlNdbox/CtopnMuIYDADjfs9KjsaOyJ5bUAACzTd74GW0YbbPTtfYpKTvbEwAAzjd5huMu7Xyn3np1nmRiAzYAwPnGA+fJ8pkr6SqWEtdwAAAzjKeHBcXgugt36rzRLgYF4z4cAMD5xjdgs0JdHnb8DBbVuyRG2wAAZphYHwsqg6u1VlGFjjkrcx8OAGCGyQsyr8qkV/kLtX5QacaSGgBglmd1ANRe6i59J0nc+AkAmGU0PWKoJUmForbxtdqcldJ+7CEAAHzW5GgbM5fJVNvlabTN5BY6AAD8lvHASbcKklzDrp9lMLYnAADMMho4ZfHl8H8F7fxqkYIAAOs0cQ2nUpuDerlcWdsYGN4JAJhl4sbPUnd9UJCptXaInXxcqjYAwIpMBM7w5axh188uS65ukcIAAOsyeYZTBldS0sHuVQbJvV2qNgDAioxfw7FhK4KNSvXeaBNNZptFCgMArMto4DTpTm02dcrKSqqCVBVfLFUbAGBFJufUlOaqLarzgw7M7QQAzDR+H07YqnOTJNV2KUnq0u3LVwUAWJ3RwNmGG912QcmzKtvKTHKnSw0AcL7x0TbqFE06qFPrh6Et2rkPBwBwvslrOHVwNdbJFNRnX6ImAMAKjQZO742abOrUKlqpbWGSbKHSAABrMh44+agyuA62V++N3CWJsxwAwPlGA6fLB0lSawd1Oqp3iTMcAMAc44GT7tVm08YvhoNNKuKrJeoCAKzMRNNAVnapVCX3rCAp5ftFCgMArMvkFtO3nSl4kGvYB4fhnQCAOSbPcK7KoUkgWKHbPstULFAWAGBtRgPHPcldqr1WVKljyoqRraYBAOcbDRyTqYpSqUK9N6pCUAzbpWoDAKzI5KSBdLrtxiyc9sOZfAgAAL9lfElNvZpk6tRrazdKLtl0RgEA8Fsm0yOYVKtU1LD7Z3Y2xQEAnO9ZS2pBQY3fLVEPAGClJgLHZJLu7ajW92qzcw0HADDLs5bUpKFpoMlZ0eqXrgkAsEITgePaRNelb7WxK9UhKHCGAwCYYTI9roqkyzA0DET7OEEaAIBzTAaOPSypKajJrpzpUgMAnG8ycI7p4yFdzoqhetGCAADr9KwLMp1nRZUqQ1AVL1+6JgDACk3MUitUh6wg09Z3uoxB0cqlagMArMh44IStquB6VRaqfaMqmkpjeCcA4HyjgROs0HXVPR50TP64ERsAAOcYDZycW0VzmUnXvlOQHmeqAQBwjolp0Z36HBRMijKZSUndUrUBAFZkYkltqypkVaf5NlelcYYDAJhlPHBCpVf1UVWQCgsqg9TpuFRtAIAVmWiLjirC0CQQzdRn9sMBAMwzGjh9utWhLx8PbJJUGzd+AgDONzlpoMtB971kZspii2kAwDyj6VGVryVJdZTcXdelVNlukcIAAOsycQ0n6LpqdFFIh5x0U7pa3y9VGwBgRUYDpwi1Xm33ijZ0qUVzlez4CQCYYTRwunxQn6KOSdqE4dCdXy1SGABgXYqxL3b9re7aWh86VxWC9kk6GEtqAIDzjQaOWaG7ttJ97zJJt13Q3t8tVBoAYE0me5yzTPG0zXQZpJ29fumaAAArNHHj5096XR9UBVMwKbnkSkvVBgBYkYnhnRuVMavNruRSl6V7ltQAADOMt0XHKx26Utml3l19loLiUrUBAFZkfAM2b9SmqGBSm7OOybW166VqAwCsyGjgpHSry6rRRRGU5ZKk0rnxEwBwvvHRNlarimmYFO1JF6UpjndSAwDwWZPXcNoU1bmrkMkkFU7gAADONxo4rqQfjsN06E0o1GepZkkNADDD5I6fyYe7PpucdNe7apWLFAYAWJfJSQNVyIpm6jRsNd2f/g8AwDnGp0Wnn3RVNdpEqVGnXWFL1QUAWJnJpoFoWV2WXK5oUhChAwA438R9OHuZScF06lGTEktqAIAZJiYN3Ou2qR+nRR96V28M7wQAnG+yaaCIWWWQkpJu+6StV0vUBQBYmYlp0Re6rBpdFq5ShaKZSiYNAABmGA2cGHe6a2uVwRVkui7j9CkRAACfMT5pwHu1KcrdFBRUkjYAgJkmhncOy2dNlnolVUH0qAEAZpm4hlNqW3SKNrRD3/dSbWzABgA438SSWtZF2Sqc2qJ/6pIuIoEDADjfaMtZGS+UTzd8loqqgykakwYAAOcbH20Tar0/biRJtQp9WdM4AACY51nxEc1VWtQmSlXgDAcAcL7xMxyrFcx1TKbOk1yS+0KVAQBWZfIMJ9qQMHdqdNtJBUtqAIAZRuOjsp3uu1JdHpbRgulxkCcAAOcYnxatpPt+2FK6VNSr0tVy5ycAYIbJBbIsqc3SpdXaFq6KJTUAwAyj8VHaRhdFr+xSaaZNdNXc9wkAmGHyfCXItYnDmc59H5ToUgMAzDDeNOBbdR7Uu7TPvbosdVzDAQDMMHmG866p1WWpUSdJatlhGgAww2jgJOvUZFOXpa1K7SLraQCAeSbPcF5Vw/YEWdJ1mcTsTgDAHKOB8z79WqU9NA24NjGLUWoAgDnGb/z0Tp2byiCVCjqmQNMAAGCW0cDZxte67wsll7YhKtgzx0sDAPAbnpUfXZYOOemy7BRJHADADKPxERT1quy0icO06OuyY3gnAGCW0cBp/E4XZaeLIqtSoTr2OnIfDgBghvEzHCvU5aA2m65CpSJkruEAAGYZbxqwGx36Qk0y7WKQuzG8EwAwy2jg9N4oS+pdQ6da2bHjJwBgltH4aH2vIKkKUu+uTdmKngEAwByjgWMWdF21ui6zdjHo7ZvvmTQAAJhlsi06u8ldqoKp2jS0RQMAZhmfFu2dkps6H1KmPdYqAxOjAQDnm2wBiObKLvXZ9f7DtUrOcAAAM4wGzo291dvLD/qiSnqzMR26SgVnOACAGSbPcMqYdFX2ui5dNxd3CkbgAADONxo493qnQ1epd9NtZ6qYpQYAmGk0cCrbad9WOqagHxo/PYAzHADA+UYDp/atLqtGl0XSH22ltit1TIwaAACcrxj7YqlKf/z1t3rfbHTXR93uL7SJbPkJADjf6OlK6ZUur29VxaRDCko56KIgcAAA5xsNnMKjzLK6HPVjE/X16x+1LfqlagMArMho4BzCQT/9+Fopm77aJF3d3LIfDgBglvHRNhrOZq7rRn93e1Qoel0U3SKFAQDWZfKE5er6VkVISm766fsvVMUkG+81AADgt4wGznW+UX25lyR9d6z07vZa7iYL20WKAwCsx/iOn5bUHysVMWkTXXXRKYasnG+Xqg8AsBKjgfPBftKP37/R1fagn12/19tvvtVl1S5VGwBgRUYD504/KMakENIwceCrHyVJRXyzSHEAgPUYDZydvdbNm3fq+0LfHy7U7TeSpDJeLFIcAGA9RgPnKl+rrFtt6kbfXL3Xu2+/1M32XmbcjQMAOM/4GY5vdfhwqZvX7/Wnv/gzdV2pIiZFK5eqDwCwEqOBs7eDvvvuK5m5Nl/+pK//+FfKOSgQOACAM40GTmNHlWWn42Gj27/5Wtc//0tdXtyrDpdL1QcAWInRwHmdX+vv/ZP/pXrT6Nu/eSuLrsurO9VG4AAAzjOxH06h7Z98p+r1rb74R3+p419/pbatFMWSGgDgPBPDO5OOv/xK8eag8h/sdfurL7Xd7fVF4j4cAMB5xm/8tL1+/Ktv1H9/pfR/Cm1ff9Crn/1SX4erpeoDAKzE+DUcv9bbf/YX6u62evfnf19X/3KYNBDNFikOALAeo4Hzs4uN+n//r3Txb7K+/He/1vGf/2t1Hy60Cdz4CQA4z2jTwEUh5Zs/0eEXF/KiVrz9lXY//1Z/+sqXqg8AsBKjgfNjI+3+23+Sf/VW6foL6b/+T+nnl3pTs+snAOA8o4Hz66bVX//nf6x606iof9KHH/6p/qj5C/3VfbVUfQCAlRgNnA9+1P5+p2//9isdu1JfXH3Q//7vv9Av75cqDwCwFub+u6/H/MPLf+v/5V980If7S7V9of/x7TeSpC6bsqRNzNrFJDP/pPvAzPWqanRRtjKTYshq+6gP7UZ9DmpyUHKTuym7KYasaEMdyU3RXDdlqyJmXVaN6qJTXXTabo6KIauuG8UiaXOxVyx7mbksfPx7eDblFOVuH//cF8opKMSsWHWnx+THeiUpxCyFLAv++PVQJsmyrMgyOx0fXJ5NnuLwA08/22x4bO4KeYrDMfnTjr7Ulh+/9lhfGH5W/LSeh1qefi6nIOXw+LVYdQqnJU6zLPcg7wrlPii1pXJXqNi0KnbH4ZiY5B7U322Vu+Lx+4SiVyj70xMYhuesK4bXqCuUuuLx+Xp4rjybukOtnKP2dzsdjht1XammL5VyUMrh8e8oSW0q1OegLkf1aXjHxNP3fHj9i5AUn7yWKZuaVMhM+ubqJ13u9rp59V67L95/8lq091u5m2KRZGF4jYvNx80CPQX1x1qe7fF1zl2h1Bcy88f30UO97f1GfVcOf59jLXdTn6Jyjjq2lbrTa//wHvbTf12OctfwOZnqolcRkqqiV10M77sYkyTp0GzUpaimL9X28fT+D4qWVRVJQa4QsmLISjko56DLzUFvXr9TLJKKYni92rZS6qPC0/euffp7XVadQkiy+PGYcKrj4c9x0ygU+ePvTFcMr2+Kw/vW7fH72un33sLH75Mffh9O79ei6hQ3zfD9T8c8vLf6/UZ9WyqnoNwXsphU1q0s+OPn+6ZS15Vqm0rHZqOUwuN7a9/W6nNQfXpey5hUxv6T5zed/g3oUqEuxcfnMMuU8qeNT1XsVZ2ez6e/b5KG191N4cnzGkOW+8fv8/Q9kGWKllXE9Mljnv4uPH2vPWXmCuaP3+/p9276UvnJY1I2JQ/qc9BtVynloO43HhdDVvkb74X45Pf4oa7u9Pd4+m/5Q+0Pf354VPfkuQunzz/8zIfj/8Of/cfPtjKPBg4AAH8o9DcDABZB4AAAFkHgAAAWQeAAABZB4AAAFkHgAAAW8X8BkZ5wvzeYanEAAAAASUVORK5CYII=",
+ "text/plain": [
+ "<Figure size 513.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 513.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "<Figure size 513.2x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "from scipy.io import wavfile\n",
+ "import scipy.signal\n",
+ "from playback import make_playback_animation\n",
+ "\n",
+ "# coding noise demonstration\n",
+ "for br in [\"inf\", 24000, 15000, 12000, 9000, 8000, 7000, 6000]:\n",
+ " fs, x = wavfile.read(f'data/a3_short_opus_{br}bps.wav')\n",
+ " spec, freqs, t, im = plt.specgram(x, NFFT=512, cmap='inferno', noverlap=256 + 128, pad_to=4096)\n",
+ " spec = 10*np.log10(spec)\n",
+ " \n",
+ " make_playback_animation(f'animations/opus_{br}bps.mp4', spec, len(x)/16)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8408ca1",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/dnn/torch/osce/test_model.py b/dnn/torch/osce/test_model.py
new file mode 100644
index 00000000..616a0ec5
--- /dev/null
+++ b/dnn/torch/osce/test_model.py
@@ -0,0 +1,96 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import argparse
+
+import torch
+
+from scipy.io import wavfile
+
+
+from models import model_dict
+from utils.silk_features import load_inference_data
+from utils import endoscopy
+
+debug = False
+if debug:
+ args = type('dummy', (object,),
+ {
+ 'input' : 'testitems/all_0_orig.se',
+ 'checkpoint' : 'testout/checkpoints/checkpoint_epoch_5.pth',
+ 'output' : 'out.wav',
+ })()
+else:
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument('input', type=str, help='path to folder with features and signals')
+ parser.add_argument('checkpoint', type=str, help='checkpoint file')
+ parser.add_argument('output', type=str, help='output file')
+ parser.add_argument('--debug', action='store_true', help='enables debug output')
+
+
+ args = parser.parse_args()
+
+
+torch.set_num_threads(2)
+
+input_folder = args.input
+checkpoint_file = args.checkpoint
+
+
+output_file = args.output
+if not output_file.endswith('.wav'):
+ output_file += '.wav'
+
+checkpoint = torch.load(checkpoint_file, map_location="cpu")
+
+# check model
+if not 'name' in checkpoint['setup']['model']:
+ print(f'warning: did not find model name entry in setup, using pitchpostfilter per default')
+ model_name = 'pitchpostfilter'
+else:
+ model_name = checkpoint['setup']['model']['name']
+
+model = model_dict[model_name](*checkpoint['setup']['model']['args'], **checkpoint['setup']['model']['kwargs'])
+
+model.load_state_dict(checkpoint['state_dict'])
+
+# generate model input
+setup = checkpoint['setup']
+signal, features, periods, numbits = load_inference_data(input_folder, **setup['data'])
+
+if args.debug:
+ endoscopy.init()
+
+output = model.process(signal, features, periods, numbits, debug=args.debug)
+
+wavfile.write(output_file, 16000, output.cpu().numpy())
+
+if args.debug:
+ endoscopy.close()
diff --git a/dnn/torch/osce/test_vocoder.py b/dnn/torch/osce/test_vocoder.py
new file mode 100644
index 00000000..e71a5c37
--- /dev/null
+++ b/dnn/torch/osce/test_vocoder.py
@@ -0,0 +1,103 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import argparse
+
+import torch
+
+from scipy.io import wavfile
+
+from time import time
+
+
+from models import model_dict
+from utils.lpcnet_features import load_lpcnet_features
+from utils import endoscopy
+
+debug = False
+if debug:
+ args = type('dummy', (object,),
+ {
+ 'input' : 'testitems/all_0_orig.se',
+ 'checkpoint' : 'testout/checkpoints/checkpoint_epoch_5.pth',
+ 'output' : 'out.wav',
+ })()
+else:
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument('input', type=str, help='path to input features')
+ parser.add_argument('checkpoint', type=str, help='checkpoint file')
+ parser.add_argument('output', type=str, help='output file')
+ parser.add_argument('--debug', action='store_true', help='enables debug output')
+
+
+ args = parser.parse_args()
+
+
+torch.set_num_threads(2)
+
+input_folder = args.input
+checkpoint_file = args.checkpoint
+
+
+output_file = args.output
+if not output_file.endswith('.wav'):
+ output_file += '.wav'
+
+checkpoint = torch.load(checkpoint_file, map_location="cpu")
+
+# check model
+if not 'name' in checkpoint['setup']['model']:
+ print(f'warning: did not find model name entry in setup, using pitchpostfilter per default')
+ model_name = 'pitchpostfilter'
+else:
+ model_name = checkpoint['setup']['model']['name']
+
+model = model_dict[model_name](*checkpoint['setup']['model']['args'], **checkpoint['setup']['model']['kwargs'])
+
+model.load_state_dict(checkpoint['state_dict'])
+
+# generate model input
+setup = checkpoint['setup']
+testdata = load_lpcnet_features(input_folder)
+features = testdata['features']
+periods = testdata['periods']
+
+if args.debug:
+ endoscopy.init()
+
+start = time()
+output = model.process(features, periods, debug=args.debug)
+elapsed = time() - start
+print(f"[timing] inference took {elapsed * 1000} ms")
+
+wavfile.write(output_file, 16000, output.cpu().numpy())
+
+if args.debug:
+ endoscopy.close()
diff --git a/dnn/torch/osce/train_model.py b/dnn/torch/osce/train_model.py
new file mode 100644
index 00000000..e8f94dcc
--- /dev/null
+++ b/dnn/torch/osce/train_model.py
@@ -0,0 +1,307 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+seed=1888
+
+import os
+import argparse
+import sys
+import random
+random.seed(seed)
+
+import yaml
+
+try:
+ import git
+ has_git = True
+except:
+ has_git = False
+
+import torch
+torch.manual_seed(seed)
+torch.backends.cudnn.benchmark = False
+from torch.optim.lr_scheduler import LambdaLR
+
+import numpy as np
+np.random.seed(seed)
+
+from scipy.io import wavfile
+
+import pesq
+
+from data import SilkEnhancementSet
+from models import model_dict
+from engine.engine import train_one_epoch, evaluate
+
+
+from utils.silk_features import load_inference_data
+from utils.misc import count_parameters, count_nonzero_parameters
+
+from losses.stft_loss import MRSTFTLoss, MRLogMelLoss
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('setup', type=str, help='setup yaml file')
+parser.add_argument('output', type=str, help='output path')
+parser.add_argument('--device', type=str, help='compute device', default=None)
+parser.add_argument('--initial-checkpoint', type=str, help='initial checkpoint', default=None)
+parser.add_argument('--testdata', type=str, help='path to features and signal for testing', default=None)
+parser.add_argument('--no-redirect', action='store_true', help='disables re-direction of stdout')
+
+args = parser.parse_args()
+
+
+
+torch.set_num_threads(4)
+
+with open(args.setup, 'r') as f:
+ setup = yaml.load(f.read(), yaml.FullLoader)
+
+checkpoint_prefix = 'checkpoint'
+output_prefix = 'output'
+setup_name = 'setup.yml'
+output_file='out.txt'
+
+
+# check model
+if not 'name' in setup['model']:
+ print(f'warning: did not find model entry in setup, using default PitchPostFilter')
+ model_name = 'pitchpostfilter'
+else:
+ model_name = setup['model']['name']
+
+# prepare output folder
+if os.path.exists(args.output):
+ print("warning: output folder exists")
+
+ reply = input('continue? (y/n): ')
+ while reply not in {'y', 'n'}:
+ reply = input('continue? (y/n): ')
+
+ if reply == 'n':
+ os._exit(0)
+else:
+ os.makedirs(args.output, exist_ok=True)
+
+checkpoint_dir = os.path.join(args.output, 'checkpoints')
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+# add repo info to setup
+if has_git:
+ working_dir = os.path.split(__file__)[0]
+ try:
+ repo = git.Repo(working_dir, search_parent_directories=True)
+ setup['repo'] = dict()
+ hash = repo.head.object.hexsha
+ urls = list(repo.remote().urls)
+ is_dirty = repo.is_dirty()
+
+ if is_dirty:
+ print("warning: repo is dirty")
+ with open(os.path.join(args.output, 'repo.diff'), "w") as f:
+ f.write(repo.git.execute(["git", "diff"]))
+
+ setup['repo']['hash'] = hash
+ setup['repo']['urls'] = urls
+ setup['repo']['dirty'] = is_dirty
+ except:
+ has_git = False
+
+# dump setup
+with open(os.path.join(args.output, setup_name), 'w') as f:
+ yaml.dump(setup, f)
+
+ref = None
+if args.testdata is not None:
+
+ testsignal, features, periods, numbits = load_inference_data(args.testdata, **setup['data'])
+
+ inference_test = True
+ inference_folder = os.path.join(args.output, 'inference_test')
+ os.makedirs(os.path.join(args.output, 'inference_test'), exist_ok=True)
+
+ try:
+ ref = np.fromfile(os.path.join(args.testdata, 'clean.s16'), dtype=np.int16)
+ except:
+ pass
+else:
+ inference_test = False
+
+# training parameters
+batch_size = setup['training']['batch_size']
+epochs = setup['training']['epochs']
+lr = setup['training']['lr']
+lr_decay_factor = setup['training']['lr_decay_factor']
+
+# load training dataset
+data_config = setup['data']
+data = SilkEnhancementSet(setup['dataset'], **data_config)
+
+# load validation dataset if given
+if 'validation_dataset' in setup:
+ validation_data = SilkEnhancementSet(setup['validation_dataset'], **data_config)
+
+ validation_dataloader = torch.utils.data.DataLoader(validation_data, batch_size=batch_size, drop_last=True, num_workers=8)
+
+ run_validation = True
+else:
+ run_validation = False
+
+# create model
+model = model_dict[model_name](*setup['model']['args'], **setup['model']['kwargs'])
+
+if args.initial_checkpoint is not None:
+ print(f"loading state dict from {args.initial_checkpoint}...")
+ chkpt = torch.load(args.initial_checkpoint, map_location='cpu')
+ model.load_state_dict(chkpt['state_dict'])
+
+# set compute device
+if type(args.device) == type(None):
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+else:
+ device = torch.device(args.device)
+
+# push model to device
+model.to(device)
+
+# dataloader
+dataloader = torch.utils.data.DataLoader(data, batch_size=batch_size, drop_last=True, shuffle=True, num_workers=8)
+
+# optimizer is introduced to trainable parameters
+parameters = [p for p in model.parameters() if p.requires_grad]
+optimizer = torch.optim.Adam(parameters, lr=lr)
+
+# learning rate scheduler
+scheduler = LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay_factor * x))
+
+# loss
+w_l1 = setup['training']['loss']['w_l1']
+w_lm = setup['training']['loss']['w_lm']
+w_slm = setup['training']['loss']['w_slm']
+w_sc = setup['training']['loss']['w_sc']
+w_logmel = setup['training']['loss']['w_logmel']
+w_wsc = setup['training']['loss']['w_wsc']
+w_xcorr = setup['training']['loss']['w_xcorr']
+w_sxcorr = setup['training']['loss']['w_sxcorr']
+w_l2 = setup['training']['loss']['w_l2']
+
+w_sum = w_l1 + w_lm + w_sc + w_logmel + w_wsc + w_slm + w_xcorr + w_sxcorr + w_l2
+
+stftloss = MRSTFTLoss(sc_weight=w_sc, log_mag_weight=w_lm, wsc_weight=w_wsc, smooth_log_mag_weight=w_slm, sxcorr_weight=w_sxcorr).to(device)
+logmelloss = MRLogMelLoss().to(device)
+
+def xcorr_loss(y_true, y_pred):
+ dims = list(range(1, len(y_true.shape)))
+
+ loss = 1 - torch.sum(y_true * y_pred, dim=dims) / torch.sqrt(torch.sum(y_true ** 2, dim=dims) * torch.sum(y_pred ** 2, dim=dims) + 1e-9)
+
+ return torch.mean(loss)
+
+def td_l2_norm(y_true, y_pred):
+ dims = list(range(1, len(y_true.shape)))
+
+ loss = torch.mean((y_true - y_pred) ** 2, dim=dims) / (torch.mean(y_pred ** 2, dim=dims) ** .5 + 1e-6)
+
+ return loss.mean()
+
+def td_l1(y_true, y_pred, pow=0):
+ dims = list(range(1, len(y_true.shape)))
+ tmp = torch.mean(torch.abs(y_true - y_pred), dim=dims) / ((torch.mean(torch.abs(y_pred), dim=dims) + 1e-9) ** pow)
+
+ return torch.mean(tmp)
+
+def criterion(x, y):
+
+ return (w_l1 * td_l1(x, y, pow=1) + stftloss(x, y) + w_logmel * logmelloss(x, y)
+ + w_xcorr * xcorr_loss(x, y) + w_l2 * td_l2_norm(x, y)) / w_sum
+
+
+
+# model checkpoint
+checkpoint = {
+ 'setup' : setup,
+ 'state_dict' : model.state_dict(),
+ 'loss' : -1
+}
+
+
+
+
+if not args.no_redirect:
+ print(f"re-directing output to {os.path.join(args.output, output_file)}")
+ sys.stdout = open(os.path.join(args.output, output_file), "w")
+
+print("summary:")
+
+print(f"{count_parameters(model.cpu()) / 1e6:5.3f} M parameters")
+if hasattr(model, 'flop_count'):
+ print(f"{model.flop_count(16000) / 1e6:5.3f} MFLOPS")
+
+if ref is not None:
+ noisy = np.fromfile(os.path.join(args.testdata, 'noisy.s16'), dtype=np.int16)
+ initial_mos = pesq.pesq(16000, ref, noisy, mode='wb')
+ print(f"initial MOS (PESQ): {initial_mos}")
+
+best_loss = 1e9
+
+for ep in range(1, epochs + 1):
+ print(f"training epoch {ep}...")
+ new_loss = train_one_epoch(model, criterion, optimizer, dataloader, device, scheduler)
+
+
+ # save checkpoint
+ checkpoint['state_dict'] = model.state_dict()
+ checkpoint['loss'] = new_loss
+
+ if run_validation:
+ print("running validation...")
+ validation_loss = evaluate(model, criterion, validation_dataloader, device)
+ checkpoint['validation_loss'] = validation_loss
+
+ if validation_loss < best_loss:
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_best.pth'))
+ best_loss = validation_loss
+
+ if inference_test:
+ print("running inference test...")
+ out = model.process(testsignal, features, periods, numbits).cpu().numpy()
+ wavfile.write(os.path.join(inference_folder, f'{model_name}_epoch_{ep}.wav'), 16000, out)
+ if ref is not None:
+ mos = pesq.pesq(16000, ref, out, mode='wb')
+ print(f"MOS (PESQ): {mos}")
+
+
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_epoch_{ep}.pth'))
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_last.pth'))
+
+
+ print(f"non-zero parameters: {count_nonzero_parameters(model)}\n")
+
+print('Done')
diff --git a/dnn/torch/osce/train_vocoder.py b/dnn/torch/osce/train_vocoder.py
new file mode 100644
index 00000000..590e6d1a
--- /dev/null
+++ b/dnn/torch/osce/train_vocoder.py
@@ -0,0 +1,287 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import sys
+
+import yaml
+
+try:
+ import git
+ has_git = True
+except:
+ has_git = False
+
+import torch
+from torch.optim.lr_scheduler import LambdaLR
+
+from scipy.io import wavfile
+
+import pesq
+
+from data import LPCNetVocodingDataset
+from models import model_dict
+from engine.vocoder_engine import train_one_epoch, evaluate
+
+
+from utils.lpcnet_features import load_lpcnet_features
+from utils.misc import count_parameters
+
+from losses.stft_loss import MRSTFTLoss, MRLogMelLoss
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('setup', type=str, help='setup yaml file')
+parser.add_argument('output', type=str, help='output path')
+parser.add_argument('--device', type=str, help='compute device', default=None)
+parser.add_argument('--initial-checkpoint', type=str, help='initial checkpoint', default=None)
+parser.add_argument('--test-features', type=str, help='path to features for testing', default=None)
+parser.add_argument('--no-redirect', action='store_true', help='disables re-direction of stdout')
+
+args = parser.parse_args()
+
+
+torch.set_num_threads(4)
+
+with open(args.setup, 'r') as f:
+ setup = yaml.load(f.read(), yaml.FullLoader)
+
+checkpoint_prefix = 'checkpoint'
+output_prefix = 'output'
+setup_name = 'setup.yml'
+output_file='out.txt'
+
+
+# check model
+if not 'name' in setup['model']:
+ print(f'warning: did not find model entry in setup, using default PitchPostFilter')
+ model_name = 'pitchpostfilter'
+else:
+ model_name = setup['model']['name']
+
+# prepare output folder
+if os.path.exists(args.output):
+ print("warning: output folder exists")
+
+ reply = input('continue? (y/n): ')
+ while reply not in {'y', 'n'}:
+ reply = input('continue? (y/n): ')
+
+ if reply == 'n':
+ os._exit()
+else:
+ os.makedirs(args.output, exist_ok=True)
+
+checkpoint_dir = os.path.join(args.output, 'checkpoints')
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+# add repo info to setup
+if has_git:
+ working_dir = os.path.split(__file__)[0]
+ try:
+ repo = git.Repo(working_dir, search_parent_directories=True)
+ setup['repo'] = dict()
+ hash = repo.head.object.hexsha
+ urls = list(repo.remote().urls)
+ is_dirty = repo.is_dirty()
+
+ if is_dirty:
+ print("warning: repo is dirty")
+
+ setup['repo']['hash'] = hash
+ setup['repo']['urls'] = urls
+ setup['repo']['dirty'] = is_dirty
+ except:
+ has_git = False
+
+# dump setup
+with open(os.path.join(args.output, setup_name), 'w') as f:
+ yaml.dump(setup, f)
+
+ref = None
+# prepare inference test if wanted
+inference_test = False
+if type(args.test_features) != type(None):
+ test_features = load_lpcnet_features(args.test_features)
+ features = test_features['features']
+ periods = test_features['periods']
+ inference_folder = os.path.join(args.output, 'inference_test')
+ os.makedirs(inference_folder, exist_ok=True)
+ inference_test = True
+
+
+# training parameters
+batch_size = setup['training']['batch_size']
+epochs = setup['training']['epochs']
+lr = setup['training']['lr']
+lr_decay_factor = setup['training']['lr_decay_factor']
+
+# load training dataset
+data_config = setup['data']
+data = LPCNetVocodingDataset(setup['dataset'], **data_config)
+
+# load validation dataset if given
+if 'validation_dataset' in setup:
+ validation_data = LPCNetVocodingDataset(setup['validation_dataset'], **data_config)
+
+ validation_dataloader = torch.utils.data.DataLoader(validation_data, batch_size=batch_size, drop_last=True, num_workers=8)
+
+ run_validation = True
+else:
+ run_validation = False
+
+# create model
+model = model_dict[model_name](*setup['model']['args'], **setup['model']['kwargs'])
+
+if args.initial_checkpoint is not None:
+ print(f"loading state dict from {args.initial_checkpoint}...")
+ chkpt = torch.load(args.initial_checkpoint, map_location='cpu')
+ model.load_state_dict(chkpt['state_dict'])
+
+# set compute device
+if type(args.device) == type(None):
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+else:
+ device = torch.device(args.device)
+
+# push model to device
+model.to(device)
+
+# dataloader
+dataloader = torch.utils.data.DataLoader(data, batch_size=batch_size, drop_last=True, shuffle=True, num_workers=8)
+
+# optimizer is introduced to trainable parameters
+parameters = [p for p in model.parameters() if p.requires_grad]
+optimizer = torch.optim.Adam(parameters, lr=lr)
+
+# learning rate scheduler
+scheduler = LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay_factor * x))
+
+# loss
+w_l1 = setup['training']['loss']['w_l1']
+w_lm = setup['training']['loss']['w_lm']
+w_slm = setup['training']['loss']['w_slm']
+w_sc = setup['training']['loss']['w_sc']
+w_logmel = setup['training']['loss']['w_logmel']
+w_wsc = setup['training']['loss']['w_wsc']
+w_xcorr = setup['training']['loss']['w_xcorr']
+w_sxcorr = setup['training']['loss']['w_sxcorr']
+w_l2 = setup['training']['loss']['w_l2']
+
+w_sum = w_l1 + w_lm + w_sc + w_logmel + w_wsc + w_slm + w_xcorr + w_sxcorr + w_l2
+
+stftloss = MRSTFTLoss(sc_weight=w_sc, log_mag_weight=w_lm, wsc_weight=w_wsc, smooth_log_mag_weight=w_slm, sxcorr_weight=w_sxcorr).to(device)
+logmelloss = MRLogMelLoss().to(device)
+
+def xcorr_loss(y_true, y_pred):
+ dims = list(range(1, len(y_true.shape)))
+
+ loss = 1 - torch.sum(y_true * y_pred, dim=dims) / torch.sqrt(torch.sum(y_true ** 2, dim=dims) * torch.sum(y_pred ** 2, dim=dims) + 1e-9)
+
+ return torch.mean(loss)
+
+def td_l2_norm(y_true, y_pred):
+ dims = list(range(1, len(y_true.shape)))
+
+ loss = torch.mean((y_true - y_pred) ** 2, dim=dims) / (torch.mean(y_pred ** 2, dim=dims) ** .5 + 1e-6)
+
+ return loss.mean()
+
+def td_l1(y_true, y_pred, pow=0):
+ dims = list(range(1, len(y_true.shape)))
+ tmp = torch.mean(torch.abs(y_true - y_pred), dim=dims) / ((torch.mean(torch.abs(y_pred), dim=dims) + 1e-9) ** pow)
+
+ return torch.mean(tmp)
+
+def criterion(x, y):
+
+ return (w_l1 * td_l1(x, y, pow=1) + stftloss(x, y) + w_logmel * logmelloss(x, y)
+ + w_xcorr * xcorr_loss(x, y) + w_l2 * td_l2_norm(x, y)) / w_sum
+
+
+
+# model checkpoint
+checkpoint = {
+ 'setup' : setup,
+ 'state_dict' : model.state_dict(),
+ 'loss' : -1
+}
+
+
+if not args.no_redirect:
+ print(f"re-directing output to {os.path.join(args.output, output_file)}")
+ sys.stdout = open(os.path.join(args.output, output_file), "w")
+
+print("summary:")
+
+print(f"{count_parameters(model.cpu()) / 1e6:5.3f} M parameters")
+if hasattr(model, 'flop_count'):
+ print(f"{model.flop_count(16000) / 1e6:5.3f} MFLOPS")
+
+if ref is not None:
+ pass
+
+best_loss = 1e9
+
+for ep in range(1, epochs + 1):
+ print(f"training epoch {ep}...")
+ new_loss = train_one_epoch(model, criterion, optimizer, dataloader, device, scheduler)
+
+
+ # save checkpoint
+ checkpoint['state_dict'] = model.state_dict()
+ checkpoint['loss'] = new_loss
+
+ if run_validation:
+ print("running validation...")
+ validation_loss = evaluate(model, criterion, validation_dataloader, device)
+ checkpoint['validation_loss'] = validation_loss
+
+ if validation_loss < best_loss:
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_best.pth'))
+ best_loss = validation_loss
+
+ if inference_test:
+ print("running inference test...")
+ out = model.process(features, periods).cpu().numpy()
+ wavfile.write(os.path.join(inference_folder, f'{model_name}_epoch_{ep}.wav'), 16000, out)
+ if ref is not None:
+ mos = pesq.pesq(16000, ref, out, mode='wb')
+ print(f"MOS (PESQ): {mos}")
+
+
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_epoch_{ep}.pth'))
+ torch.save(checkpoint, os.path.join(checkpoint_dir, checkpoint_prefix + f'_last.pth'))
+
+
+ print()
+
+print('Done')
diff --git a/dnn/torch/osce/utils/ada_conv.py b/dnn/torch/osce/utils/ada_conv.py
new file mode 100644
index 00000000..b5b93f87
--- /dev/null
+++ b/dnn/torch/osce/utils/ada_conv.py
@@ -0,0 +1,71 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+# x is (batch, nb_in_channels, nb_frames*frame_size)
+# kernels is (batch, nb_out_channels, nb_in_channels, nb_frames, coeffs)
+def adaconv_kernel(x, kernels, half_window, fft_size=256):
+ device=x.device
+ overlap_size=half_window.size(-1)
+ nb_frames=kernels.size(3)
+ nb_batches=kernels.size(0)
+ nb_out_channels=kernels.size(1)
+ nb_in_channels=kernels.size(2)
+ kernel_size = kernels.size(-1)
+ x = x.reshape(nb_batches, 1, nb_in_channels, nb_frames, -1)
+ frame_size = x.size(-1)
+ # build window: [zeros, rising window, ones, falling window, zeros]
+ window = torch.cat(
+ [
+ torch.zeros(frame_size, device=device),
+ half_window,
+ torch.ones(frame_size - overlap_size, device=device),
+ 1 - half_window,
+ torch.zeros(fft_size - 2 * frame_size - overlap_size,device=device)
+ ])
+ x_prev = torch.cat([torch.zeros_like(x[:, :, :, :1, :]), x[:, :, :, :-1, :]], dim=-2)
+ x_next = torch.cat([x[:, :, :, 1:, :overlap_size], torch.zeros_like(x[:, :, :, -1:, :overlap_size])], dim=-2)
+ x_padded = torch.cat([x_prev, x, x_next, torch.zeros(nb_batches, 1, nb_in_channels, nb_frames, fft_size - 2 * frame_size - overlap_size, device=device)], -1)
+ k_padded = torch.cat([torch.flip(kernels, [-1]), torch.zeros(nb_batches, nb_out_channels, nb_in_channels, nb_frames, fft_size-kernel_size, device=device)], dim=-1)
+
+ # compute convolution
+ X = torch.fft.rfft(x_padded, dim=-1)
+ K = torch.fft.rfft(k_padded, dim=-1)
+
+ out = torch.fft.irfft(X * K, dim=-1)
+ # combine in channels
+ out = torch.sum(out, dim=2)
+ # apply the cross-fading
+ out = window.reshape(1, 1, 1, -1)*out
+ crossfaded = out[:,:,:,frame_size:2*frame_size] + torch.cat([torch.zeros(nb_batches, nb_out_channels, 1, frame_size, device=device), out[:, :, :-1, 2*frame_size:3*frame_size]], dim=-2)
+
+ return crossfaded.reshape(nb_batches, nb_out_channels, -1) \ No newline at end of file
diff --git a/dnn/torch/osce/utils/complexity.py b/dnn/torch/osce/utils/complexity.py
new file mode 100644
index 00000000..4ee6e3f3
--- /dev/null
+++ b/dnn/torch/osce/utils/complexity.py
@@ -0,0 +1,8 @@
+
+
+def _conv1d_flop_count(layer, rate):
+ return 2 * ((layer.in_channels + 1) * layer.out_channels * rate / layer.stride[0] ) * layer.kernel_size[0]
+
+
+def _dense_flop_count(layer, rate):
+ return 2 * ((layer.in_features + 1) * layer.out_features * rate ) \ No newline at end of file
diff --git a/dnn/torch/osce/utils/endoscopy.py b/dnn/torch/osce/utils/endoscopy.py
new file mode 100644
index 00000000..05dd4750
--- /dev/null
+++ b/dnn/torch/osce/utils/endoscopy.py
@@ -0,0 +1,205 @@
+""" module for inspecting models during inference """
+
+import os
+
+import yaml
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+
+import torch
+import numpy as np
+
+# stores entries {key : {'fid' : fid, 'fs' : fs, 'dim' : dim, 'dtype' : dtype}}
+_state = dict()
+_folder = 'endoscopy'
+
+def get_gru_gates(gru, input, state):
+ hidden_size = gru.hidden_size
+
+ direct = torch.matmul(gru.weight_ih_l0, input.squeeze())
+ recurrent = torch.matmul(gru.weight_hh_l0, state.squeeze())
+
+ # reset gate
+ start, stop = 0 * hidden_size, 1 * hidden_size
+ reset_gate = torch.sigmoid(direct[start : stop] + gru.bias_ih_l0[start : stop] + recurrent[start : stop] + gru.bias_hh_l0[start : stop])
+
+ # update gate
+ start, stop = 1 * hidden_size, 2 * hidden_size
+ update_gate = torch.sigmoid(direct[start : stop] + gru.bias_ih_l0[start : stop] + recurrent[start : stop] + gru.bias_hh_l0[start : stop])
+
+ # new gate
+ start, stop = 2 * hidden_size, 3 * hidden_size
+ new_gate = torch.tanh(direct[start : stop] + gru.bias_ih_l0[start : stop] + reset_gate * (recurrent[start : stop] + gru.bias_hh_l0[start : stop]))
+
+ return {'reset_gate' : reset_gate, 'update_gate' : update_gate, 'new_gate' : new_gate}
+
+
+def init(folder='endoscopy'):
+ """ sets up output folder for endoscopy data """
+
+ global _folder
+ _folder = folder
+
+ if not os.path.exists(folder):
+ os.makedirs(folder)
+ else:
+ print(f"warning: endoscopy folder {folder} exists. Content may be lost or inconsistent results may occur.")
+
+def write_data(key, data, fs):
+ """ appends data to previous data written under key """
+
+ global _state
+
+ # convert to numpy if torch.Tensor is given
+ if isinstance(data, torch.Tensor):
+ data = data.detach().numpy()
+
+ if not key in _state:
+ _state[key] = {
+ 'fid' : open(os.path.join(_folder, key + '.bin'), 'wb'),
+ 'fs' : fs,
+ 'dim' : tuple(data.shape),
+ 'dtype' : str(data.dtype)
+ }
+
+ with open(os.path.join(_folder, key + '.yml'), 'w') as f:
+ f.write(yaml.dump({'fs' : fs, 'dim' : tuple(data.shape), 'dtype' : str(data.dtype).split('.')[-1]}))
+ else:
+ if _state[key]['fs'] != fs:
+ raise ValueError(f"fs changed for key {key}: {_state[key]['fs']} vs. {fs}")
+ if _state[key]['dtype'] != str(data.dtype):
+ raise ValueError(f"dtype changed for key {key}: {_state[key]['dtype']} vs. {str(data.dtype)}")
+ if _state[key]['dim'] != tuple(data.shape):
+ raise ValueError(f"dim changed for key {key}: {_state[key]['dim']} vs. {tuple(data.shape)}")
+
+ _state[key]['fid'].write(data.tobytes())
+
+def close(folder='endoscopy'):
+ """ clean up """
+ for key in _state.keys():
+ _state[key]['fid'].close()
+
+
+def read_data(folder='endoscopy'):
+ """ retrieves written data as numpy arrays """
+
+
+ keys = [name[:-4] for name in os.listdir(folder) if name.endswith('.yml')]
+
+ return_dict = dict()
+
+ for key in keys:
+ with open(os.path.join(folder, key + '.yml'), 'r') as f:
+ value = yaml.load(f.read(), yaml.FullLoader)
+
+ with open(os.path.join(folder, key + '.bin'), 'rb') as f:
+ data = np.frombuffer(f.read(), dtype=value['dtype'])
+
+ value['data'] = data.reshape((-1,) + value['dim'])
+
+ return_dict[key] = value
+
+ return return_dict
+
+def get_best_reshape(shape, target_ratio=1):
+ """ calculated the best 2d reshape of shape given the target ratio (rows/cols)"""
+
+ if len(shape) > 1:
+ pixel_count = 1
+ for s in shape:
+ pixel_count *= s
+ else:
+ pixel_count = shape[0]
+
+ if pixel_count == 1:
+ return (1,)
+
+ num_columns = int((pixel_count / target_ratio)**.5)
+
+ while (pixel_count % num_columns):
+ num_columns -= 1
+
+ num_rows = pixel_count // num_columns
+
+ return (num_rows, num_columns)
+
+def get_type_and_shape(shape):
+
+ # can happen if data is one dimensional
+ if len(shape) == 0:
+ shape = (1,)
+
+ # calculate pixel count
+ if len(shape) > 1:
+ pixel_count = 1
+ for s in shape:
+ pixel_count *= s
+ else:
+ pixel_count = shape[0]
+
+ if pixel_count == 1:
+ return 'plot', (1, )
+
+ # stay with shape if already 2-dimensional
+ if len(shape) == 2:
+ if (shape[0] != pixel_count) or (shape[1] != pixel_count):
+ return 'image', shape
+
+ return 'image', get_best_reshape(shape)
+
+def make_animation(data, filename, start_index=80, stop_index=-80, interval=20, half_signal_window_length=80):
+
+ # determine plot setup
+ num_keys = len(data.keys())
+
+ num_rows = int((num_keys * 3/4) ** .5)
+
+ num_cols = (num_keys + num_rows - 1) // num_rows
+
+ fig, axs = plt.subplots(num_rows, num_cols)
+ fig.set_size_inches(num_cols * 5, num_rows * 5)
+
+ display = dict()
+
+ fs_max = max([val['fs'] for val in data.values()])
+
+ num_samples = max([val['data'].shape[0] for val in data.values()])
+
+ keys = sorted(data.keys())
+
+ # inspect data
+ for i, key in enumerate(keys):
+ axs[i // num_cols, i % num_cols].title.set_text(key)
+
+ display[key] = dict()
+
+ display[key]['type'], display[key]['shape'] = get_type_and_shape(data[key]['dim'])
+ display[key]['down_factor'] = data[key]['fs'] / fs_max
+
+ start_index = max(start_index, half_signal_window_length)
+ while stop_index < 0:
+ stop_index += num_samples
+
+ stop_index = min(stop_index, num_samples - half_signal_window_length)
+
+ # actual plotting
+ frames = []
+ for index in range(start_index, stop_index):
+ ims = []
+ for i, key in enumerate(keys):
+ feature_index = int(round(index * display[key]['down_factor']))
+
+ if display[key]['type'] == 'plot':
+ ims.append(axs[i // num_cols, i % num_cols].plot(data[key]['data'][index - half_signal_window_length : index + half_signal_window_length], marker='P', markevery=[half_signal_window_length], animated=True, color='blue')[0])
+
+ elif display[key]['type'] == 'image':
+ ims.append(axs[i // num_cols, i % num_cols].imshow(data[key]['data'][index].reshape(display[key]['shape']), animated=True))
+
+ frames.append(ims)
+
+ ani = animation.ArtistAnimation(fig, frames, interval=interval, blit=True, repeat_delay=1000)
+
+ if not filename.endswith('.mp4'):
+ filename += '.mp4'
+
+ ani.save(filename) \ No newline at end of file
diff --git a/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py b/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py
new file mode 100644
index 00000000..0d87ca19
--- /dev/null
+++ b/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py
@@ -0,0 +1,230 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from utils.endoscopy import write_data
+from utils.softquant import soft_quant
+
+class LimitedAdaptiveComb1d(nn.Module):
+ COUNTER = 1
+
+ def __init__(self,
+ kernel_size,
+ feature_dim,
+ frame_size=160,
+ overlap_size=40,
+ padding=None,
+ max_lag=256,
+ name=None,
+ gain_limit_db=10,
+ global_gain_limits_db=[-6, 6],
+ norm_p=2,
+ softquant=False,
+ apply_weight_norm=False,
+ **kwargs):
+ """
+
+ Parameters:
+ -----------
+
+ feature_dim : int
+ dimension of features from which kernels, biases and gains are computed
+
+ frame_size : int, optional
+ frame size, defaults to 160
+
+ overlap_size : int, optional
+ overlap size for filter cross-fade. Cross-fade is done on the first overlap_size samples of every frame, defaults to 40
+
+ use_bias : bool, optional
+ if true, biases will be added to output channels. Defaults to True
+
+ padding : List[int, int], optional
+ left and right padding. Defaults to [(kernel_size - 1) // 2, kernel_size - 1 - (kernel_size - 1) // 2]
+
+ max_lag : int, optional
+ maximal pitch lag, defaults to 256
+
+ have_a0 : bool, optional
+ If true, the filter coefficient a0 will be learned as a positive gain (requires in_channels == out_channels). Otherwise, a0 is set to 0. Defaults to False
+
+ name: str or None, optional
+ specifies a name attribute for the module. If None the name is auto generated as comb_1d_COUNT, where COUNT is an instance counter for LimitedAdaptiveComb1d
+
+ """
+
+ super(LimitedAdaptiveComb1d, self).__init__()
+
+ self.in_channels = 1
+ self.out_channels = 1
+ self.feature_dim = feature_dim
+ self.kernel_size = kernel_size
+ self.frame_size = frame_size
+ self.overlap_size = overlap_size
+ self.max_lag = max_lag
+ self.limit_db = gain_limit_db
+ self.norm_p = norm_p
+
+ if name is None:
+ self.name = "limited_adaptive_comb1d_" + str(LimitedAdaptiveComb1d.COUNTER)
+ LimitedAdaptiveComb1d.COUNTER += 1
+ else:
+ self.name = name
+
+ norm = torch.nn.utils.weight_norm if apply_weight_norm else lambda x, name=None: x
+
+ # network for generating convolution weights
+ self.conv_kernel = norm(nn.Linear(feature_dim, kernel_size))
+
+ if softquant:
+ self.conv_kernel = soft_quant(self.conv_kernel)
+
+
+ # comb filter gain
+ self.filter_gain = norm(nn.Linear(feature_dim, 1))
+ self.log_gain_limit = gain_limit_db * 0.11512925464970229
+ with torch.no_grad():
+ self.filter_gain.bias[:] = max(0.1, 4 + self.log_gain_limit)
+
+ self.global_filter_gain = norm(nn.Linear(feature_dim, 1))
+ log_min, log_max = global_gain_limits_db[0] * 0.11512925464970229, global_gain_limits_db[1] * 0.11512925464970229
+ self.filter_gain_a = (log_max - log_min) / 2
+ self.filter_gain_b = (log_max + log_min) / 2
+
+ if type(padding) == type(None):
+ self.padding = [kernel_size // 2, kernel_size - 1 - kernel_size // 2]
+ else:
+ self.padding = padding
+
+ self.overlap_win = nn.Parameter(.5 + .5 * torch.cos((torch.arange(self.overlap_size) + 0.5) * torch.pi / overlap_size), requires_grad=False)
+
+ def forward(self, x, features, lags, debug=False):
+ """ adaptive 1d convolution
+
+
+ Parameters:
+ -----------
+ x : torch.tensor
+ input signal of shape (batch_size, in_channels, num_samples)
+
+ feathres : torch.tensor
+ frame-wise features of shape (batch_size, num_frames, feature_dim)
+
+ lags: torch.LongTensor
+ frame-wise lags for comb-filtering
+
+ """
+
+ batch_size = x.size(0)
+ num_frames = features.size(1)
+ num_samples = x.size(2)
+ frame_size = self.frame_size
+ overlap_size = self.overlap_size
+ kernel_size = self.kernel_size
+ win1 = torch.flip(self.overlap_win, [0])
+ win2 = self.overlap_win
+
+ if num_samples // self.frame_size != num_frames:
+ raise ValueError('non matching sizes in AdaptiveConv1d.forward')
+
+ conv_kernels = self.conv_kernel(features).reshape((batch_size, num_frames, self.out_channels, self.in_channels, self.kernel_size))
+ conv_kernels = conv_kernels / (1e-6 + torch.norm(conv_kernels, p=self.norm_p, dim=-1, keepdim=True))
+
+ conv_gains = torch.exp(- torch.relu(self.filter_gain(features).permute(0, 2, 1)) + self.log_gain_limit)
+ # calculate gains
+ global_conv_gains = torch.exp(self.filter_gain_a * torch.tanh(self.global_filter_gain(features).permute(0, 2, 1)) + self.filter_gain_b)
+
+ if debug and batch_size == 1:
+ key = self.name + "_gains"
+ write_data(key, conv_gains.detach().squeeze().cpu().numpy(), 16000 // self.frame_size)
+ key = self.name + "_kernels"
+ write_data(key, conv_kernels.detach().squeeze().cpu().numpy(), 16000 // self.frame_size)
+ key = self.name + "_lags"
+ write_data(key, lags.detach().squeeze().cpu().numpy(), 16000 // self.frame_size)
+ key = self.name + "_global_conv_gains"
+ write_data(key, global_conv_gains.detach().squeeze().cpu().numpy(), 16000 // self.frame_size)
+
+
+ # frame-wise convolution with overlap-add
+ output_frames = []
+ overlap_mem = torch.zeros((batch_size, self.out_channels, self.overlap_size), device=x.device)
+ x = F.pad(x, self.padding)
+ x = F.pad(x, [self.max_lag, self.overlap_size])
+
+ idx = torch.arange(frame_size + kernel_size - 1 + overlap_size).to(x.device).view(1, 1, -1)
+ idx = torch.repeat_interleave(idx, batch_size, 0)
+ idx = torch.repeat_interleave(idx, self.in_channels, 1)
+
+
+ for i in range(num_frames):
+
+ cidx = idx + i * frame_size + self.max_lag - lags[..., i].view(batch_size, 1, 1)
+ xx = torch.gather(x, -1, cidx).reshape((1, batch_size * self.in_channels, -1))
+
+ new_chunk = torch.conv1d(xx, conv_kernels[:, i, ...].reshape((batch_size * self.out_channels, self.in_channels, self.kernel_size)), groups=batch_size).reshape(batch_size, self.out_channels, -1)
+
+ offset = self.max_lag + self.padding[0]
+ new_chunk = global_conv_gains[:, :, i : i + 1] * (new_chunk * conv_gains[:, :, i : i + 1] + x[..., offset + i * frame_size : offset + (i + 1) * frame_size + overlap_size])
+
+ # overlapping part
+ output_frames.append(new_chunk[:, :, : overlap_size] * win1 + overlap_mem * win2)
+
+ # non-overlapping part
+ output_frames.append(new_chunk[:, :, overlap_size : frame_size])
+
+ # mem for next frame
+ overlap_mem = new_chunk[:, :, frame_size :]
+
+ # concatenate chunks
+ output = torch.cat(output_frames, dim=-1)
+
+ return output
+
+ def flop_count(self, rate):
+ frame_rate = rate / self.frame_size
+ overlap = self.overlap_size
+ overhead = overlap / self.frame_size
+
+ count = 0
+
+ # kernel computation and filtering
+ count += 2 * (frame_rate * self.feature_dim * self.kernel_size)
+ count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)
+ count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
+
+ # a0 computation
+ count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
+
+ # windowing
+ count += overlap * frame_rate * 3 * self.out_channels
+
+ return count
diff --git a/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py b/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py
new file mode 100644
index 00000000..55df8c14
--- /dev/null
+++ b/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py
@@ -0,0 +1,200 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from utils.endoscopy import write_data
+
+from utils.ada_conv import adaconv_kernel
+from utils.softquant import soft_quant
+
+class LimitedAdaptiveConv1d(nn.Module):
+ COUNTER = 1
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ feature_dim,
+ frame_size=160,
+ overlap_size=40,
+ padding=None,
+ name=None,
+ gain_limits_db=[-6, 6],
+ shape_gain_db=0,
+ norm_p=2,
+ softquant=False,
+ apply_weight_norm=False,
+ **kwargs):
+ """
+
+ Parameters:
+ -----------
+
+ in_channels : int
+ number of input channels
+
+ out_channels : int
+ number of output channels
+
+ feature_dim : int
+ dimension of features from which kernels, biases and gains are computed
+
+ frame_size : int
+ frame size
+
+ overlap_size : int
+ overlap size for filter cross-fade. Cross-fade is done on the first overlap_size samples of every frame
+
+ use_bias : bool
+ if true, biases will be added to output channels
+
+
+ padding : List[int, int]
+
+ """
+
+ super(LimitedAdaptiveConv1d, self).__init__()
+
+
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.feature_dim = feature_dim
+ self.kernel_size = kernel_size
+ self.frame_size = frame_size
+ self.overlap_size = overlap_size
+ self.gain_limits_db = gain_limits_db
+ self.shape_gain_db = shape_gain_db
+ self.norm_p = norm_p
+
+ if name is None:
+ self.name = "limited_adaptive_conv1d_" + str(LimitedAdaptiveConv1d.COUNTER)
+ LimitedAdaptiveConv1d.COUNTER += 1
+ else:
+ self.name = name
+
+ norm = torch.nn.utils.weight_norm if apply_weight_norm else lambda x, name=None: x
+
+ # network for generating convolution weights
+ self.conv_kernel = norm(nn.Linear(feature_dim, in_channels * out_channels * kernel_size))
+ if softquant:
+ self.conv_kernel = soft_quant(self.conv_kernel)
+
+ self.shape_gain = min(1, 10**(shape_gain_db / 20))
+
+ self.filter_gain = norm(nn.Linear(feature_dim, out_channels))
+ log_min, log_max = gain_limits_db[0] * 0.11512925464970229, gain_limits_db[1] * 0.11512925464970229
+ self.filter_gain_a = (log_max - log_min) / 2
+ self.filter_gain_b = (log_max + log_min) / 2
+
+ if type(padding) == type(None):
+ self.padding = [kernel_size // 2, kernel_size - 1 - kernel_size // 2]
+ else:
+ self.padding = padding
+
+ self.overlap_win = nn.Parameter(.5 + .5 * torch.cos((torch.arange(self.overlap_size) + 0.5) * torch.pi / overlap_size), requires_grad=False)
+
+
+ def flop_count(self, rate):
+ frame_rate = rate / self.frame_size
+ overlap = self.overlap_size
+ overhead = overlap / self.frame_size
+
+ count = 0
+
+ # kernel computation and filtering
+ count += 2 * (frame_rate * self.feature_dim * self.kernel_size)
+ count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)
+
+ # gain computation
+
+ count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
+
+ # windowing
+ count += 3 * overlap * frame_rate * self.out_channels
+
+ return count
+
+ def forward(self, x, features, debug=False):
+ """ adaptive 1d convolution
+
+
+ Parameters:
+ -----------
+ x : torch.tensor
+ input signal of shape (batch_size, in_channels, num_samples)
+
+ feathres : torch.tensor
+ frame-wise features of shape (batch_size, num_frames, feature_dim)
+
+ """
+
+ batch_size = x.size(0)
+ num_frames = features.size(1)
+ num_samples = x.size(2)
+ frame_size = self.frame_size
+ overlap_size = self.overlap_size
+ kernel_size = self.kernel_size
+ win1 = torch.flip(self.overlap_win, [0])
+ win2 = self.overlap_win
+
+ if num_samples // self.frame_size != num_frames:
+ raise ValueError('non matching sizes in AdaptiveConv1d.forward')
+
+ conv_kernels = self.conv_kernel(features).reshape((batch_size, num_frames, self.out_channels, self.in_channels, self.kernel_size))
+
+ # normalize kernels (TODO: switch to L1 and normalize over kernel and input channel dimension)
+ conv_kernels = conv_kernels / (1e-6 + torch.norm(conv_kernels, p=self.norm_p, dim=[-2, -1], keepdim=True))
+
+ # limit shape
+ id_kernels = torch.zeros_like(conv_kernels)
+ id_kernels[..., self.padding[1]] = 1
+
+ conv_kernels = self.shape_gain * conv_kernels + (1 - self.shape_gain) * id_kernels
+
+ # calculate gains
+ conv_gains = torch.exp(self.filter_gain_a * torch.tanh(self.filter_gain(features)) + self.filter_gain_b)
+ if debug and batch_size == 1:
+ key = self.name + "_gains"
+ write_data(key, conv_gains.permute(0, 2, 1).detach().squeeze().cpu().numpy(), 16000 // self.frame_size)
+ key = self.name + "_kernels"
+ write_data(key, conv_kernels.detach().squeeze().cpu().numpy(), 16000 // self.frame_size)
+
+
+ conv_kernels = conv_kernels * conv_gains.view(batch_size, num_frames, self.out_channels, 1, 1)
+
+ conv_kernels = conv_kernels.permute(0, 2, 3, 1, 4)
+
+ output = adaconv_kernel(x, conv_kernels, win1, fft_size=256)
+
+
+ return output \ No newline at end of file
diff --git a/dnn/torch/osce/utils/layers/noise_shaper.py b/dnn/torch/osce/utils/layers/noise_shaper.py
new file mode 100644
index 00000000..ba8a3af3
--- /dev/null
+++ b/dnn/torch/osce/utils/layers/noise_shaper.py
@@ -0,0 +1,100 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from utils.complexity import _conv1d_flop_count
+
+class NoiseShaper(nn.Module):
+
+ def __init__(self,
+ feature_dim,
+ frame_size=160
+ ):
+ """
+
+ Parameters:
+ -----------
+
+ feature_dim : int
+ dimension of input features
+
+ frame_size : int
+ frame size
+
+ """
+
+ super().__init__()
+
+ self.feature_dim = feature_dim
+ self.frame_size = frame_size
+
+ # feature transform
+ self.feature_alpha1 = nn.Conv1d(self.feature_dim, frame_size, 2)
+ self.feature_alpha2 = nn.Conv1d(frame_size, frame_size, 2)
+
+
+ def flop_count(self, rate):
+
+ frame_rate = rate / self.frame_size
+
+ shape_flops = sum([_conv1d_flop_count(x, frame_rate) for x in (self.feature_alpha1, self.feature_alpha2)]) + 11 * frame_rate * self.frame_size
+
+ return shape_flops
+
+
+ def forward(self, features):
+ """ creates temporally shaped noise
+
+
+ Parameters:
+ -----------
+ features : torch.tensor
+ frame-wise features of shape (batch_size, num_frames, feature_dim)
+
+ """
+
+ batch_size = features.size(0)
+ num_frames = features.size(1)
+ frame_size = self.frame_size
+ num_samples = num_frames * frame_size
+
+ # feature path
+ f = F.pad(features.permute(0, 2, 1), [1, 0])
+ alpha = F.leaky_relu(self.feature_alpha1(f), 0.2)
+ alpha = torch.exp(self.feature_alpha2(F.pad(alpha, [1, 0])))
+ alpha = alpha.permute(0, 2, 1)
+
+ # signal generation
+ y = torch.randn((batch_size, num_frames, frame_size), dtype=features.dtype, device=features.device)
+ y = alpha * y
+
+ return y.reshape(batch_size, 1, num_samples)
diff --git a/dnn/torch/osce/utils/layers/pitch_auto_correlator.py b/dnn/torch/osce/utils/layers/pitch_auto_correlator.py
new file mode 100644
index 00000000..ef58ae8e
--- /dev/null
+++ b/dnn/torch/osce/utils/layers/pitch_auto_correlator.py
@@ -0,0 +1,84 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+
+class PitchAutoCorrelator(nn.Module):
+ def __init__(self,
+ frame_size=80,
+ pitch_min=32,
+ pitch_max=300,
+ radius=2):
+
+ super().__init__()
+
+ self.frame_size = frame_size
+ self.pitch_min = pitch_min
+ self.pitch_max = pitch_max
+ self.radius = radius
+
+
+ def forward(self, x, periods):
+ # x of shape (batch_size, channels, num_samples)
+ # periods of shape (batch_size, num_frames)
+
+ num_frames = periods.size(1)
+ batch_size = periods.size(0)
+ num_samples = self.frame_size * num_frames
+ channels = x.size(1)
+
+ assert num_samples == x.size(-1)
+
+ range = torch.arange(-self.radius, self.radius + 1, device=x.device)
+ idx = torch.arange(self.frame_size * num_frames, device=x.device)
+ p_up = torch.repeat_interleave(periods, self.frame_size, 1)
+ lookup = idx + self.pitch_max - p_up
+ lookup = lookup.unsqueeze(-1) + range
+ lookup = lookup.unsqueeze(1)
+
+ # padding
+ x_pad = F.pad(x, [self.pitch_max, 0])
+ x_ext = torch.repeat_interleave(x_pad.unsqueeze(-1), 2 * self.radius + 1, -1)
+
+ # framing
+ x_select = torch.gather(x_ext, 2, lookup)
+ x_frames = x_pad[..., self.pitch_max : ].reshape(batch_size, channels, num_frames, self.frame_size, 1)
+ lag_frames = x_select.reshape(batch_size, 1, num_frames, self.frame_size, -1)
+
+ # calculate auto-correlation
+ dotp = torch.sum(x_frames * lag_frames, dim=-2)
+ frame_nrg = torch.sum(x_frames * x_frames, dim=-2)
+ lag_frame_nrg = torch.sum(lag_frames * lag_frames, dim=-2)
+
+ acorr = dotp / torch.sqrt(frame_nrg * lag_frame_nrg + 1e-9)
+
+ return acorr
diff --git a/dnn/torch/osce/utils/layers/silk_upsampler.py b/dnn/torch/osce/utils/layers/silk_upsampler.py
new file mode 100644
index 00000000..0d20b8a6
--- /dev/null
+++ b/dnn/torch/osce/utils/layers/silk_upsampler.py
@@ -0,0 +1,167 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+""" This module implements the SILK upsampler from 16kHz to 24 or 48 kHz """
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+import numpy as np
+
+frac_fir = np.array(
+ [
+ [189, -600, 617, 30567, 2996, -1375, 425, -46],
+ [117, -159, -1070, 29704, 5784, -2143, 611, -71],
+ [52, 221, -2392, 28276, 8798, -2865, 773, -91],
+ [-4, 529, -3350, 26341, 11950, -3487, 896, -103],
+ [-48, 758, -3956, 23973, 15143, -3957, 967, -107],
+ [-80, 905, -4235, 21254, 18278, -4222, 972, -99],
+ [-99, 972, -4222, 18278, 21254, -4235, 905, -80],
+ [-107, 967, -3957, 15143, 23973, -3956, 758, -48],
+ [-103, 896, -3487, 11950, 26341, -3350, 529, -4],
+ [-91, 773, -2865, 8798, 28276, -2392, 221, 52],
+ [-71, 611, -2143, 5784, 29704, -1070, -159, 117],
+ [-46, 425, -1375, 2996, 30567, 617, -600, 189]
+ ],
+ dtype=np.float32
+) / 2**15
+
+
+hq_2x_up_c_even = [x / 2**16 for x in [1746, 14986, 39083 - 65536]]
+hq_2x_up_c_odd = [x / 2**16 for x in [6854, 25769, 55542 - 65536]]
+
+
+def get_impz(coeffs, n):
+ s = 3*[0]
+ y = np.zeros(n)
+ x = 1
+
+ for i in range(n):
+ Y = x - s[0]
+ X = Y * coeffs[0]
+ tmp1 = s[0] + X
+ s[0] = x + X
+
+ Y = tmp1 - s[1]
+ X = Y * coeffs[1]
+ tmp2 = s[1] + X
+ s[1] = tmp1 + X
+
+ Y = tmp2 - s[2]
+ X = Y * (1 + coeffs[2])
+ tmp3 = s[2] + X
+ s[2] = tmp2 + X
+
+ y[i] = tmp3
+ x = 0
+
+ return y
+
+
+
+class SilkUpsampler(nn.Module):
+ SUPPORTED_TARGET_RATES = {24000, 48000}
+ SUPPORTED_SOURCE_RATES = {16000}
+ def __init__(self,
+ fs_in=16000,
+ fs_out=48000):
+
+ super().__init__()
+ self.fs_in = fs_in
+ self.fs_out = fs_out
+
+ if fs_in not in self.SUPPORTED_SOURCE_RATES:
+ raise ValueError(f'SilkUpsampler currently only supports upsampling from {self.SUPPORTED_SOURCE_RATES} Hz')
+
+
+ if fs_out not in self.SUPPORTED_TARGET_RATES:
+ raise ValueError(f'SilkUpsampler currently only supports upsampling to {self.SUPPORTED_TARGET_RATES} Hz')
+
+
+ # hq 2x upsampler as FIR approximation
+ hq_2x_up_even = get_impz(hq_2x_up_c_even, 128)[::-1].copy()
+ hq_2x_up_odd = get_impz(hq_2x_up_c_odd , 128)[::-1].copy()
+
+ self.hq_2x_up_even = nn.Parameter(torch.from_numpy(hq_2x_up_even).float().view(1, 1, -1), requires_grad=False)
+ self.hq_2x_up_odd = nn.Parameter(torch.from_numpy(hq_2x_up_odd ).float().view(1, 1, -1), requires_grad=False)
+ self.hq_2x_up_padding = [127, 0]
+
+ # interpolation filters
+ frac_01_24 = frac_fir[0]
+ frac_17_24 = frac_fir[8]
+ frac_09_24 = frac_fir[4]
+
+ self.frac_01_24 = nn.Parameter(torch.from_numpy(frac_01_24).view(1, 1, -1), requires_grad=False)
+ self.frac_17_24 = nn.Parameter(torch.from_numpy(frac_17_24).view(1, 1, -1), requires_grad=False)
+ self.frac_09_24 = nn.Parameter(torch.from_numpy(frac_09_24).view(1, 1, -1), requires_grad=False)
+
+ self.stride = 1 if fs_out == 48000 else 2
+
+ def hq_2x_up(self, x):
+
+ num_channels = x.size(1)
+
+ weight_even = torch.repeat_interleave(self.hq_2x_up_even, num_channels, 0)
+ weight_odd = torch.repeat_interleave(self.hq_2x_up_odd , num_channels, 0)
+
+ x_pad = F.pad(x, self.hq_2x_up_padding)
+ y_even = F.conv1d(x_pad, weight_even, groups=num_channels)
+ y_odd = F.conv1d(x_pad, weight_odd , groups=num_channels)
+
+ y = torch.cat((y_even.unsqueeze(-1), y_odd.unsqueeze(-1)), dim=-1).flatten(2)
+
+ return y
+
+ def interpolate_3_2(self, x):
+
+ num_channels = x.size(1)
+
+ weight_01_24 = torch.repeat_interleave(self.frac_01_24, num_channels, 0)
+ weight_17_24 = torch.repeat_interleave(self.frac_17_24, num_channels, 0)
+ weight_09_24 = torch.repeat_interleave(self.frac_09_24, num_channels, 0)
+
+ x_pad = F.pad(x, [8, 0])
+ y_01_24 = F.conv1d(x_pad, weight_01_24, stride=2, groups=num_channels)
+ y_17_24 = F.conv1d(x_pad, weight_17_24, stride=2, groups=num_channels)
+ y_09_24_sh1 = F.conv1d(torch.roll(x_pad, -1, -1), weight_09_24, stride=2, groups=num_channels)
+
+
+ y = torch.cat(
+ (y_01_24.unsqueeze(-1), y_17_24.unsqueeze(-1), y_09_24_sh1.unsqueeze(-1)),
+ dim=-1).flatten(2)
+
+ return y[..., :-3]
+
+ def forward(self, x):
+
+ y_2x = self.hq_2x_up(x)
+ y_3x = self.interpolate_3_2(y_2x)
+
+ return y_3x[:, :, ::self.stride]
diff --git a/dnn/torch/osce/utils/layers/td_shaper.py b/dnn/torch/osce/utils/layers/td_shaper.py
new file mode 100644
index 00000000..fa7bf348
--- /dev/null
+++ b/dnn/torch/osce/utils/layers/td_shaper.py
@@ -0,0 +1,145 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from utils.complexity import _conv1d_flop_count
+from utils.softquant import soft_quant
+
+class TDShaper(nn.Module):
+ COUNTER = 1
+
+ def __init__(self,
+ feature_dim,
+ frame_size=160,
+ avg_pool_k=4,
+ innovate=False,
+ pool_after=False,
+ softquant=False,
+ apply_weight_norm=False
+ ):
+ """
+
+ Parameters:
+ -----------
+
+
+ feature_dim : int
+ dimension of input features
+
+ frame_size : int
+ frame size
+
+ avg_pool_k : int, optional
+ kernel size and stride for avg pooling
+
+ padding : List[int, int]
+
+ """
+
+ super().__init__()
+
+
+ self.feature_dim = feature_dim
+ self.frame_size = frame_size
+ self.avg_pool_k = avg_pool_k
+ self.innovate = innovate
+ self.pool_after = pool_after
+
+ assert frame_size % avg_pool_k == 0
+ self.env_dim = frame_size // avg_pool_k + 1
+
+ norm = torch.nn.utils.weight_norm if apply_weight_norm else lambda x, name=None: x
+
+ # feature transform
+ self.feature_alpha1_f = norm(nn.Conv1d(self.feature_dim, frame_size, 2))
+ self.feature_alpha1_t = norm(nn.Conv1d(self.env_dim, frame_size, 2))
+ self.feature_alpha2 = norm(nn.Conv1d(frame_size, frame_size, 2))
+
+ if softquant:
+ self.feature_alpha1_f = soft_quant(self.feature_alpha1_f)
+
+ if self.innovate:
+ self.feature_alpha1b = norm(nn.Conv1d(self.feature_dim + self.env_dim, frame_size, 2))
+ self.feature_alpha1c = norm(nn.Conv1d(self.feature_dim + self.env_dim, frame_size, 2))
+
+ self.feature_alpha2b = norm(nn.Conv1d(frame_size, frame_size, 2))
+ self.feature_alpha2c = norm(nn.Conv1d(frame_size, frame_size, 2))
+
+
+ def flop_count(self, rate):
+
+ frame_rate = rate / self.frame_size
+
+ shape_flops = sum([_conv1d_flop_count(x, frame_rate) for x in (self.feature_alpha1_f, self.feature_alpha1_t, self.feature_alpha2)]) + 11 * frame_rate * self.frame_size
+
+ if self.innovate:
+ inno_flops = sum([_conv1d_flop_count(x, frame_rate) for x in (self.feature_alpha1b, self.feature_alpha2b, self.feature_alpha1c, self.feature_alpha2c)]) + 22 * frame_rate * self.frame_size
+ else:
+ inno_flops = 0
+
+ return shape_flops + inno_flops
+
+ def envelope_transform(self, x):
+
+ x = torch.abs(x)
+ if self.pool_after:
+ x = torch.log(x + .5**16)
+ x = F.avg_pool1d(x, self.avg_pool_k, self.avg_pool_k)
+ else:
+ x = F.avg_pool1d(x, self.avg_pool_k, self.avg_pool_k)
+ x = torch.log(x + .5**16)
+
+ x = x.reshape(x.size(0), -1, self.env_dim - 1)
+ avg_x = torch.mean(x, -1, keepdim=True)
+
+ x = torch.cat((x - avg_x, avg_x), dim=-1)
+
+ return x
+
+ def forward(self, x, features, debug=False):
+ """ innovate signal parts with temporal shaping
+
+
+ Parameters:
+ -----------
+ x : torch.tensor
+ input signal of shape (batch_size, 1, num_samples)
+
+ features : torch.tensor
+ frame-wise features of shape (batch_size, num_frames, feature_dim)
+
+ """
+
+ batch_size = x.size(0)
+ num_frames = features.size(1)
+ num_samples = x.size(2)
+ frame_size = self.frame_size
+
+ # generate temporal envelope
+ tenv = self.envelope_transform(x)
+
+ # feature path
+ f = F.pad(features.permute(0, 2, 1), [1, 0])
+ t = F.pad(tenv.permute(0, 2, 1), [1, 0])
+ alpha = self.feature_alpha1_f(f) + self.feature_alpha1_t(t)
+ alpha = F.leaky_relu(alpha, 0.2)
+ alpha = torch.exp(self.feature_alpha2(F.pad(alpha, [1, 0])))
+ alpha = alpha.permute(0, 2, 1)
+
+ if self.innovate:
+ inno_alpha = F.leaky_relu(self.feature_alpha1b(f), 0.2)
+ inno_alpha = torch.exp(self.feature_alpha2b(F.pad(inno_alpha, [1, 0])))
+ inno_alpha = inno_alpha.permute(0, 2, 1)
+
+ inno_x = F.leaky_relu(self.feature_alpha1c(f), 0.2)
+ inno_x = torch.tanh(self.feature_alpha2c(F.pad(inno_x, [1, 0])))
+ inno_x = inno_x.permute(0, 2, 1)
+
+ # signal path
+ y = x.reshape(batch_size, num_frames, -1)
+ y = alpha * y
+
+ if self.innovate:
+ y = y + inno_alpha * inno_x
+
+ return y.reshape(batch_size, 1, num_samples)
diff --git a/dnn/torch/osce/utils/lpcnet_features.py b/dnn/torch/osce/utils/lpcnet_features.py
new file mode 100644
index 00000000..3d109fd3
--- /dev/null
+++ b/dnn/torch/osce/utils/lpcnet_features.py
@@ -0,0 +1,112 @@
+import os
+
+import torch
+import numpy as np
+
+def load_lpcnet_features(feature_file, version=2):
+ if version == 2:
+ layout = {
+ 'cepstrum': [0,18],
+ 'periods': [18, 19],
+ 'pitch_corr': [19, 20],
+ 'lpc': [20, 36]
+ }
+ frame_length = 36
+
+ elif version == 1:
+ layout = {
+ 'cepstrum': [0,18],
+ 'periods': [36, 37],
+ 'pitch_corr': [37, 38],
+ 'lpc': [39, 55],
+ }
+ frame_length = 55
+ else:
+ raise ValueError(f'unknown feature version: {version}')
+
+
+ raw_features = torch.from_numpy(np.fromfile(feature_file, dtype='float32'))
+ raw_features = raw_features.reshape((-1, frame_length))
+
+ features = torch.cat(
+ [
+ raw_features[:, layout['cepstrum'][0] : layout['cepstrum'][1]],
+ raw_features[:, layout['pitch_corr'][0] : layout['pitch_corr'][1]]
+ ],
+ dim=1
+ )
+
+ lpcs = raw_features[:, layout['lpc'][0] : layout['lpc'][1]]
+ periods = (0.1 + 50 * raw_features[:, layout['periods'][0] : layout['periods'][1]] + 100).long()
+
+ return {'features' : features, 'periods' : periods, 'lpcs' : lpcs}
+
+
+
+def create_new_data(signal_path, reference_data_path, new_data_path, offset=320, preemph_factor=0.85):
+ ref_data = np.memmap(reference_data_path, dtype=np.int16)
+ signal = np.memmap(signal_path, dtype=np.int16)
+
+ signal_preemph_path = os.path.splitext(signal_path)[0] + '_preemph.raw'
+ signal_preemph = np.memmap(signal_preemph_path, dtype=np.int16, mode='write', shape=signal.shape)
+
+
+ assert len(signal) % 160 == 0
+ num_frames = len(signal) // 160
+ mem = np.zeros(1)
+ for fr in range(len(signal)//160):
+ signal_preemph[fr * 160 : (fr + 1) * 160] = np.convolve(np.concatenate((mem, signal[fr * 160 : (fr + 1) * 160])), [1, -preemph_factor], mode='valid')
+ mem = signal[(fr + 1) * 160 - 1 : (fr + 1) * 160]
+
+ new_data = np.memmap(new_data_path, dtype=np.int16, mode='write', shape=ref_data.shape)
+
+ new_data[:] = 0
+ N = len(signal) - offset
+ new_data[1 : 2*N + 1: 2] = signal_preemph[offset:]
+ new_data[2 : 2*N + 2: 2] = signal_preemph[offset:]
+
+
+def parse_warpq_scores(output_file):
+ """ extracts warpq scores from output file """
+
+ with open(output_file, "r") as f:
+ lines = f.readlines()
+
+ scores = [float(line.split("WARP-Q score:")[-1]) for line in lines if line.startswith("WARP-Q score:")]
+
+ return scores
+
+
+def parse_stats_file(file):
+
+ with open(file, "r") as f:
+ lines = f.readlines()
+
+ mean = float(lines[0].split(":")[-1])
+ bt_mean = float(lines[1].split(":")[-1])
+ top_mean = float(lines[2].split(":")[-1])
+
+ return mean, bt_mean, top_mean
+
+def collect_test_stats(test_folder):
+ """ collects statistics for all discovered metrics from test folder """
+
+ metrics = {'pesq', 'warpq', 'pitch_error', 'voicing_error'}
+
+ results = dict()
+
+ content = os.listdir(test_folder)
+
+ stats_files = [file for file in content if file.startswith('stats_')]
+
+ for file in stats_files:
+ metric = file[len("stats_") : -len(".txt")]
+
+ if metric not in metrics:
+ print(f"warning: unknown metric {metric}")
+
+ mean, bt_mean, top_mean = parse_stats_file(os.path.join(test_folder, file))
+
+ results[metric] = [mean, bt_mean, top_mean]
+
+ return results
diff --git a/dnn/torch/osce/utils/misc.py b/dnn/torch/osce/utils/misc.py
new file mode 100644
index 00000000..68ee4bfd
--- /dev/null
+++ b/dnn/torch/osce/utils/misc.py
@@ -0,0 +1,95 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+from torch.nn.utils import remove_weight_norm
+
+def count_parameters(model, verbose=False):
+ total = 0
+ for name, p in model.named_parameters():
+ count = torch.ones_like(p).sum().item()
+
+ if verbose:
+ print(f"{name}: {count} parameters")
+
+ total += count
+
+ return total
+
+def count_nonzero_parameters(model, verbose=False):
+ total = 0
+ for name, p in model.named_parameters():
+ count = torch.count_nonzero(p).item()
+
+ if verbose:
+ print(f"{name}: {count} non-zero parameters")
+
+ total += count
+
+ return total
+def retain_grads(module):
+ for p in module.parameters():
+ if p.requires_grad:
+ p.retain_grad()
+
+def get_grad_norm(module, p=2):
+ norm = 0
+ for param in module.parameters():
+ if param.requires_grad:
+ norm = norm + (torch.abs(param.grad) ** p).sum()
+
+ return norm ** (1/p)
+
+def create_weights(s_real, s_gen, alpha):
+ weights = []
+ with torch.no_grad():
+ for sr, sg in zip(s_real, s_gen):
+ weight = torch.exp(alpha * (sr[-1] - sg[-1]))
+ weights.append(weight)
+
+ return weights
+
+
+def _get_candidates(module: torch.nn.Module):
+ candidates = []
+ for key in module.__dict__.keys():
+ if hasattr(module, key + '_v'):
+ candidates.append(key)
+ return candidates
+
+def remove_all_weight_norm(model : torch.nn.Module, verbose=False):
+ for name, m in model.named_modules():
+ candidates = _get_candidates(m)
+
+ for candidate in candidates:
+ try:
+ remove_weight_norm(m, name=candidate)
+ if verbose: print(f'removed weight norm on weight {name}.{candidate}')
+ except:
+ pass
diff --git a/dnn/torch/osce/utils/moc.py b/dnn/torch/osce/utils/moc.py
new file mode 100644
index 00000000..3dffcfd0
--- /dev/null
+++ b/dnn/torch/osce/utils/moc.py
@@ -0,0 +1,153 @@
+import numpy as np
+import scipy.signal
+
+def compute_vad_mask(x, fs, stop_db=-70):
+
+ frame_length = (fs + 49) // 50
+ x = x[: frame_length * (len(x) // frame_length)]
+
+ frames = x.reshape(-1, frame_length)
+ frame_energy = np.sum(frames ** 2, axis=1)
+ frame_energy_smooth = np.convolve(frame_energy, np.ones(5) / 5, mode='same')
+
+ max_threshold = frame_energy.max() * 10 ** (stop_db/20)
+ vactive = np.ones_like(frames)
+ vactive[frame_energy_smooth < max_threshold, :] = 0
+ vactive = vactive.reshape(-1)
+
+ filter = np.sin(np.arange(frame_length) * np.pi / (frame_length - 1))
+ filter = filter / filter.sum()
+
+ mask = np.convolve(vactive, filter, mode='same')
+
+ return x, mask
+
+def convert_mask(mask, num_frames, frame_size=160, hop_size=40):
+ num_samples = frame_size + (num_frames - 1) * hop_size
+ if len(mask) < num_samples:
+ mask = np.concatenate((mask, np.zeros(num_samples - len(mask))), dtype=mask.dtype)
+ else:
+ mask = mask[:num_samples]
+
+ new_mask = np.array([np.mean(mask[i*hop_size : i*hop_size + frame_size]) for i in range(num_frames)])
+
+ return new_mask
+
+def power_spectrum(x, window_size=160, hop_size=40, window='hamming'):
+ num_spectra = (len(x) - window_size - hop_size) // hop_size
+ window = scipy.signal.get_window(window, window_size)
+ N = window_size // 2
+
+ frames = np.concatenate([x[np.newaxis, i * hop_size : i * hop_size + window_size] for i in range(num_spectra)]) * window
+ psd = np.abs(np.fft.fft(frames, axis=1)[:, :N + 1]) ** 2
+
+ return psd
+
+
+def frequency_mask(num_bands, up_factor, down_factor):
+
+ up_mask = np.zeros((num_bands, num_bands))
+ down_mask = np.zeros((num_bands, num_bands))
+
+ for i in range(num_bands):
+ up_mask[i, : i + 1] = up_factor ** np.arange(i, -1, -1)
+ down_mask[i, i :] = down_factor ** np.arange(num_bands - i)
+
+ return down_mask @ up_mask
+
+
+def rect_fb(band_limits, num_bins=None):
+ num_bands = len(band_limits) - 1
+ if num_bins is None:
+ num_bins = band_limits[-1]
+
+ fb = np.zeros((num_bands, num_bins))
+ for i in range(num_bands):
+ fb[i, band_limits[i]:band_limits[i+1]] = 1
+
+ return fb
+
+
+def compare(x, y, apply_vad=False):
+ """ Modified version of opus_compare for 16 kHz mono signals
+
+ Args:
+ x (np.ndarray): reference input signal scaled to [-1, 1]
+ y (np.ndarray): test signal scaled to [-1, 1]
+
+ Returns:
+ float: perceptually weighted error
+ """
+ # filter bank: bark scale with minimum-2-bin bands and cutoff at 7.5 kHz
+ band_limits = [0, 2, 4, 6, 7, 9, 11, 13, 15, 18, 22, 26, 31, 36, 43, 51, 60, 75]
+ num_bands = len(band_limits) - 1
+ fb = rect_fb(band_limits, num_bins=81)
+
+ # trim samples to same size
+ num_samples = min(len(x), len(y))
+ x = x[:num_samples] * 2**15
+ y = y[:num_samples] * 2**15
+
+ psd_x = power_spectrum(x) + 100000
+ psd_y = power_spectrum(y) + 100000
+
+ num_frames = psd_x.shape[0]
+
+ # average band energies
+ be_x = (psd_x @ fb.T) / np.sum(fb, axis=1)
+
+ # frequecy masking
+ f_mask = frequency_mask(num_bands, 0.1, 0.03)
+ mask_x = be_x @ f_mask.T
+
+ # temporal masking
+ for i in range(1, num_frames):
+ mask_x[i, :] += 0.5 * mask_x[i-1, :]
+
+ # apply mask
+ masked_psd_x = psd_x + 0.1 * (mask_x @ fb)
+ masked_psd_y = psd_y + 0.1 * (mask_x @ fb)
+
+ # 2-frame average
+ masked_psd_x = masked_psd_x[1:] + masked_psd_x[:-1]
+ masked_psd_y = masked_psd_y[1:] + masked_psd_y[:-1]
+
+ # distortion metric
+ re = masked_psd_y / masked_psd_x
+ im = np.log(re) ** 2
+ Eb = ((im @ fb.T) / np.sum(fb, axis=1))
+ Ef = np.mean(Eb , axis=1)
+
+ if apply_vad:
+ _, mask = compute_vad_mask(x, 16000)
+ mask = convert_mask(mask, Ef.shape[0])
+ else:
+ mask = np.ones_like(Ef)
+
+ err = np.mean(np.abs(Ef[mask > 1e-6]) ** 3) ** (1/6)
+
+ return float(err)
+
+if __name__ == "__main__":
+ import argparse
+ from scipy.io import wavfile
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('ref', type=str, help='reference wav file')
+ parser.add_argument('deg', type=str, help='degraded wav file')
+ parser.add_argument('--apply-vad', action='store_true')
+ args = parser.parse_args()
+
+
+ fs1, x = wavfile.read(args.ref)
+ fs2, y = wavfile.read(args.deg)
+
+ if max(fs1, fs2) != 16000:
+ raise ValueError('error: encountered sampling frequency diffrent from 16kHz')
+
+ x = x.astype(np.float32) / 2**15
+ y = y.astype(np.float32) / 2**15
+
+ err = compare(x, y, apply_vad=args.apply_vad)
+
+ print(f"MOC: {err}")
diff --git a/dnn/torch/osce/utils/pitch.py b/dnn/torch/osce/utils/pitch.py
new file mode 100644
index 00000000..2a233812
--- /dev/null
+++ b/dnn/torch/osce/utils/pitch.py
@@ -0,0 +1,122 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import numpy as np
+
+def hangover(lags, num_frames=10):
+ lags = lags.copy()
+ count = 0
+ last_lag = 0
+
+ for i in range(len(lags)):
+ lag = lags[i]
+
+ if lag == 0:
+ if count < num_frames:
+ lags[i] = last_lag
+ count += 1
+ else:
+ count = 0
+ last_lag = lag
+
+ return lags
+
+
+def smooth_pitch_lags(lags, d=2):
+
+ assert d < 4
+
+ num_silk_frames = len(lags) // 4
+
+ smoothed_lags = lags.copy()
+
+ tmp = np.arange(1, d+1)
+ kernel = np.concatenate((tmp, [d+1], tmp[::-1]), dtype=np.float32)
+ kernel = kernel / np.sum(kernel)
+
+ last = lags[0:d][::-1]
+ for i in range(num_silk_frames):
+ frame = lags[i * 4: (i+1) * 4]
+
+ if np.max(np.abs(frame)) == 0:
+ last = frame[4-d:]
+ continue
+
+ if i == num_silk_frames - 1:
+ next = frame[4-d:][::-1]
+ else:
+ next = lags[(i+1) * 4 : (i+1) * 4 + d]
+
+ if np.max(np.abs(next)) == 0:
+ next = frame[4-d:][::-1]
+
+ if np.max(np.abs(last)) == 0:
+ last = frame[0:d][::-1]
+
+ smoothed_frame = np.convolve(np.concatenate((last, frame, next), dtype=np.float32), kernel, mode='valid')
+
+ smoothed_lags[i * 4: (i+1) * 4] = np.round(smoothed_frame)
+
+ last = frame[4-d:]
+
+ return smoothed_lags
+
+def calculate_acorr_window(x, frame_size, lags, history=None, max_lag=300, radius=2, add_double_lag_acorr=False, no_pitch_threshold=32):
+ eps = 1e-9
+
+ lag_multiplier = 2 if add_double_lag_acorr else 1
+
+ if history is None:
+ history = np.zeros(lag_multiplier * max_lag + radius, dtype=x.dtype)
+
+ offset = len(history)
+
+ assert offset >= max_lag + radius
+ assert len(x) % frame_size == 0
+
+ num_frames = len(x) // frame_size
+ lags = lags.copy()
+
+ x_ext = np.concatenate((history, x), dtype=x.dtype)
+
+ d = radius
+ num_acorrs = 2 * d + 1
+ acorrs = np.zeros((num_frames, lag_multiplier * num_acorrs), dtype=x.dtype)
+
+ for idx in range(num_frames):
+ lag = lags[idx].item()
+ frame = x_ext[offset + idx * frame_size : offset + (idx + 1) * frame_size]
+
+ for k in range(lag_multiplier):
+ lag1 = (k + 1) * lag if lag >= no_pitch_threshold else lag
+ for j in range(num_acorrs):
+ past = x_ext[offset + idx * frame_size - lag1 + j - d : offset + (idx + 1) * frame_size - lag1 + j - d]
+ acorrs[idx, j + k * num_acorrs] = np.dot(frame, past) / np.sqrt(np.dot(frame, frame) * np.dot(past, past) + eps)
+
+ return acorrs, lags \ No newline at end of file
diff --git a/dnn/torch/osce/utils/silk_features.py b/dnn/torch/osce/utils/silk_features.py
new file mode 100644
index 00000000..8c5dbf05
--- /dev/null
+++ b/dnn/torch/osce/utils/silk_features.py
@@ -0,0 +1,144 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+
+import numpy as np
+import torch
+
+import scipy
+import scipy.signal
+
+from utils.pitch import hangover, calculate_acorr_window
+from utils.spec import create_filter_bank, cepstrum, log_spectrum, log_spectrum_from_lpc
+
+def spec_from_lpc(a, n_fft=128, eps=1e-9):
+ order = a.shape[-1]
+ assert order + 1 < n_fft
+
+ x = np.zeros((*a.shape[:-1], n_fft ))
+ x[..., 0] = 1
+ x[..., 1:1 + order] = -a
+
+ X = np.fft.fft(x, axis=-1)
+ X = np.abs(X[..., :n_fft//2 + 1]) ** 2
+
+ S = 1 / (X + eps)
+
+ return S
+
+def silk_feature_factory(no_pitch_value=256,
+ acorr_radius=2,
+ pitch_hangover=8,
+ num_bands_clean_spec=64,
+ num_bands_noisy_spec=18,
+ noisy_spec_scale='opus',
+ noisy_apply_dct=True,
+ add_double_lag_acorr=False
+ ):
+
+ w = scipy.signal.windows.cosine(320)
+ fb_clean_spec = create_filter_bank(num_bands_clean_spec, 320, scale='erb', round_center_bins=True, normalize=True)
+ fb_noisy_spec = create_filter_bank(num_bands_noisy_spec, 320, scale=noisy_spec_scale, round_center_bins=True, normalize=True)
+
+ def create_features(noisy, noisy_history, lpcs, gains, ltps, periods):
+
+ periods = periods.copy()
+
+ if pitch_hangover > 0:
+ periods = hangover(periods, num_frames=pitch_hangover)
+
+ periods[periods == 0] = no_pitch_value
+
+ clean_spectrum = 0.3 * log_spectrum_from_lpc(lpcs, fb=fb_clean_spec, n_fft=320)
+
+ if noisy_apply_dct:
+ noisy_cepstrum = np.repeat(
+ cepstrum(np.concatenate((noisy_history[-160:], noisy), dtype=np.float32), 320, fb_noisy_spec, w), 2, 0)
+ else:
+ noisy_cepstrum = np.repeat(
+ log_spectrum(np.concatenate((noisy_history[-160:], noisy), dtype=np.float32), 320, fb_noisy_spec, w), 2, 0)
+
+ log_gains = np.log(gains + 1e-9).reshape(-1, 1)
+
+ acorr, _ = calculate_acorr_window(noisy, 80, periods, noisy_history, radius=acorr_radius, add_double_lag_acorr=add_double_lag_acorr)
+
+ features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)
+
+ return features, periods.astype(np.int64)
+
+ return create_features
+
+
+
+def load_inference_data(path,
+ no_pitch_value=256,
+ skip=92,
+ preemph=0.85,
+ acorr_radius=2,
+ pitch_hangover=8,
+ num_bands_clean_spec=64,
+ num_bands_noisy_spec=18,
+ noisy_spec_scale='opus',
+ noisy_apply_dct=True,
+ add_double_lag_acorr=False,
+ **kwargs):
+
+ print(f"[load_inference_data]: ignoring keyword arguments {kwargs.keys()}...")
+
+ lpcs = np.fromfile(os.path.join(path, 'features_lpc.f32'), dtype=np.float32).reshape(-1, 16)
+ ltps = np.fromfile(os.path.join(path, 'features_ltp.f32'), dtype=np.float32).reshape(-1, 5)
+ gains = np.fromfile(os.path.join(path, 'features_gain.f32'), dtype=np.float32)
+ periods = np.fromfile(os.path.join(path, 'features_period.s16'), dtype=np.int16)
+ num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32).astype(np.float32).reshape(-1, 1)
+ num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32).reshape(-1, 1)
+
+ # load signal, add back delay and pre-emphasize
+ signal = np.fromfile(os.path.join(path, 'noisy.s16'), dtype=np.int16).astype(np.float32) / (2 ** 15)
+ signal = np.concatenate((np.zeros(skip, dtype=np.float32), signal), dtype=np.float32)
+
+ create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_double_lag_acorr)
+
+ num_frames = min((len(signal) // 320) * 4, len(lpcs))
+ signal = signal[: num_frames * 80]
+ lpcs = lpcs[: num_frames]
+ ltps = ltps[: num_frames]
+ gains = gains[: num_frames]
+ periods = periods[: num_frames]
+ num_bits = num_bits[: num_frames // 4]
+ num_bits_smooth = num_bits[: num_frames // 4]
+
+ numbits = np.repeat(np.concatenate((num_bits, num_bits_smooth), axis=-1, dtype=np.float32), 4, axis=0)
+
+ features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods)
+
+ if preemph > 0:
+ signal[1:] -= preemph * signal[:-1]
+
+ return torch.from_numpy(signal), torch.from_numpy(features), torch.from_numpy(periods), torch.from_numpy(numbits)
diff --git a/dnn/torch/osce/utils/softquant.py b/dnn/torch/osce/utils/softquant.py
new file mode 100644
index 00000000..5fca5b2a
--- /dev/null
+++ b/dnn/torch/osce/utils/softquant.py
@@ -0,0 +1,110 @@
+import torch
+
+@torch.no_grad()
+def compute_optimal_scale(weight):
+ with torch.no_grad():
+ n_out, n_in = weight.shape
+ assert n_in % 4 == 0
+ if n_out % 8:
+ # add padding
+ pad = n_out - n_out % 8
+ weight = torch.cat((weight, torch.zeros((pad, n_in), dtype=weight.dtype, device=weight.device)), dim=0)
+
+ weight_max_abs, _ = torch.max(torch.abs(weight), dim=1)
+ weight_max_sum, _ = torch.max(torch.abs(weight[:, : n_in : 2] + weight[:, 1 : n_in : 2]), dim=1)
+ scale_max = weight_max_abs / 127
+ scale_sum = weight_max_sum / 129
+
+ scale = torch.maximum(scale_max, scale_sum)
+
+ return scale[:n_out]
+
+@torch.no_grad()
+def q_scaled_noise(module, weight):
+ if isinstance(module, torch.nn.Conv1d):
+ w = weight.permute(0, 2, 1).flatten(1)
+ noise = torch.rand_like(w) - 0.5
+ scale = compute_optimal_scale(w)
+ noise = noise * scale.unsqueeze(-1)
+ noise = noise.reshape(weight.size(0), weight.size(2), weight.size(1)).permute(0, 2, 1)
+ elif isinstance(module, torch.nn.ConvTranspose1d):
+ i, o, k = weight.shape
+ w = weight.permute(2, 1, 0).reshape(k * o, i)
+ noise = torch.rand_like(w) - 0.5
+ scale = compute_optimal_scale(w)
+ noise = noise * scale.unsqueeze(-1)
+ noise = noise.reshape(k, o, i).permute(2, 1, 0)
+ elif len(weight.shape) == 2:
+ noise = torch.rand_like(weight) - 0.5
+ scale = compute_optimal_scale(weight)
+ noise = noise * scale.unsqueeze(-1)
+ else:
+ raise ValueError('unknown quantization setting')
+
+ return noise
+
+class SoftQuant:
+ name: str
+
+ def __init__(self, names: str, scale: float) -> None:
+ self.names = names
+ self.quantization_noise = None
+ self.scale = scale
+
+ def __call__(self, module, inputs, *args, before=True):
+ if not module.training: return
+
+ if before:
+ self.quantization_noise = dict()
+ for name in self.names:
+ weight = getattr(module, name)
+ if self.scale is None:
+ self.quantization_noise[name] = q_scaled_noise(module, weight)
+ else:
+ self.quantization_noise[name] = \
+ self.scale * weight.abs().max() * (torch.rand_like(weight) - 0.5)
+ with torch.no_grad():
+ weight.data[:] = weight + self.quantization_noise[name]
+ else:
+ for name in self.names:
+ weight = getattr(module, name)
+ with torch.no_grad():
+ weight.data[:] = weight - self.quantization_noise[name]
+ self.quantization_noise = None
+
+ def apply(module, names=['weight'], scale=None):
+ fn = SoftQuant(names, scale)
+
+ for name in names:
+ if not hasattr(module, name):
+ raise ValueError("")
+
+ fn_before = lambda *x : fn(*x, before=True)
+ fn_after = lambda *x : fn(*x, before=False)
+ setattr(fn_before, 'sqm', fn)
+ setattr(fn_after, 'sqm', fn)
+
+
+ module.register_forward_pre_hook(fn_before)
+ module.register_forward_hook(fn_after)
+
+ module
+
+ return fn
+
+
+def soft_quant(module, names=['weight'], scale=None):
+ fn = SoftQuant.apply(module, names, scale)
+ return module
+
+def remove_soft_quant(module, names=['weight']):
+ for k, hook in module._forward_pre_hooks.items():
+ if hasattr(hook, 'sqm'):
+ if isinstance(hook.sqm, SoftQuant) and hook.sqm.names == names:
+ del module._forward_pre_hooks[k]
+ for k, hook in module._forward_hooks.items():
+ if hasattr(hook, 'sqm'):
+ if isinstance(hook.sqm, SoftQuant) and hook.sqm.names == names:
+ del module._forward_hooks[k]
+
+ return module \ No newline at end of file
diff --git a/dnn/torch/osce/utils/spec.py b/dnn/torch/osce/utils/spec.py
new file mode 100644
index 00000000..59f53538
--- /dev/null
+++ b/dnn/torch/osce/utils/spec.py
@@ -0,0 +1,210 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import math as m
+import numpy as np
+import scipy
+import scipy.fftpack
+import torch
+
+def erb(f):
+ return 24.7 * (4.37 * f + 1)
+
+def inv_erb(e):
+ return (e / 24.7 - 1) / 4.37
+
+def bark(f):
+ return 6 * m.asinh(f/600)
+
+def inv_bark(b):
+ return 600 * m.sinh(b / 6)
+
+
+scale_dict = {
+ 'bark': [bark, inv_bark],
+ 'erb': [erb, inv_erb]
+}
+
+def gen_filterbank(N, Fs=16000, keep_size=False):
+ in_freq = (np.arange(N+1, dtype='float32')/N*Fs/2)[None,:]
+ M = N + 1 if keep_size else N
+ out_freq = (np.arange(M, dtype='float32')/N*Fs/2)[:,None]
+ #ERB from B.C.J Moore, An Introduction to the Psychology of Hearing, 5th Ed., page 73.
+ ERB_N = 24.7 + .108*in_freq
+ delta = np.abs(in_freq-out_freq)/ERB_N
+ center = (delta<.5).astype('float32')
+ R = -12*center*delta**2 + (1-center)*(3-12*delta)
+ RE = 10.**(R/10.)
+ norm = np.sum(RE, axis=1)
+ RE = RE/norm[:, np.newaxis]
+ return torch.from_numpy(RE)
+
+def create_filter_bank(num_bands, n_fft=320, fs=16000, scale='bark', round_center_bins=False, return_upper=False, normalize=False):
+
+ f0 = 0
+ num_bins = n_fft // 2 + 1
+ f1 = fs / n_fft * (num_bins - 1)
+ fstep = fs / n_fft
+
+ if scale == 'opus':
+ bins_5ms = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40]
+ fac = 1000 * n_fft / fs / 5
+ if num_bands != 18:
+ print("warning: requested Opus filter bank with num_bands != 18. Adjusting num_bands.")
+ num_bands = 18
+ center_bins = np.array([fac * bin for bin in bins_5ms])
+ else:
+ to_scale, from_scale = scale_dict[scale]
+
+ s0 = to_scale(f0)
+ s1 = to_scale(f1)
+
+ center_freqs = np.array([f0] + [from_scale(s0 + i * (s1 - s0) / (num_bands)) for i in range(1, num_bands - 1)] + [f1])
+ center_bins = (center_freqs - f0) / fstep
+
+ if round_center_bins:
+ center_bins = np.round(center_bins)
+
+ filter_bank = np.zeros((num_bands, num_bins))
+
+ band = 0
+ for bin in range(num_bins):
+ # update band index
+ if bin > center_bins[band + 1]:
+ band += 1
+
+ # calculate filter coefficients
+ frac = (center_bins[band + 1] - bin) / (center_bins[band + 1] - center_bins[band])
+ filter_bank[band][bin] = frac
+ filter_bank[band + 1][bin] = 1 - frac
+
+ if return_upper:
+ extend = n_fft - num_bins
+ filter_bank = np.concatenate((filter_bank, np.fliplr(filter_bank[:, 1:extend+1])), axis=1)
+
+ if normalize:
+ filter_bank = filter_bank / np.sum(filter_bank, axis=1).reshape(-1, 1)
+
+ return filter_bank
+
+
+def compressed_log_spec(pspec):
+
+ lpspec = np.zeros_like(pspec)
+ num_bands = pspec.shape[-1]
+
+ log_max = -2
+ follow = -2
+
+ for i in range(num_bands):
+ tmp = np.log10(pspec[i] + 1e-9)
+ tmp = max(log_max, max(follow - 2.5, tmp))
+ lpspec[i] = tmp
+ log_max = max(log_max, tmp)
+ follow = max(follow - 2.5, tmp)
+
+ return lpspec
+
+def log_spectrum_from_lpc(a, fb=None, n_fft=320, eps=1e-9, gamma=1, compress=False, power=1):
+ """ calculates cepstrum from SILK lpcs """
+ order = a.shape[-1]
+ assert order + 1 < n_fft
+
+ a = a * (gamma ** (1 + np.arange(order))).astype(np.float32)
+
+ x = np.zeros((*a.shape[:-1], n_fft ))
+ x[..., 0] = 1
+ x[..., 1:1 + order] = -a
+
+ X = np.fft.fft(x, axis=-1)
+ X = np.abs(X[..., :n_fft//2 + 1]) ** power
+
+ S = 1 / (X + eps)
+
+ if fb is None:
+ Sf = S
+ else:
+ Sf = np.matmul(S, fb.T)
+
+ if compress:
+ Sf = np.apply_along_axis(compressed_log_spec, -1, Sf)
+ else:
+ Sf = np.log(Sf + eps)
+
+ return Sf
+
+def cepstrum_from_lpc(a, fb=None, n_fft=320, eps=1e-9, gamma=1, compress=False):
+ """ calculates cepstrum from SILK lpcs """
+
+ Sf = log_spectrum_from_lpc(a, fb, n_fft, eps, gamma, compress)
+
+ cepstrum = scipy.fftpack.dct(Sf, 2, norm='ortho')
+
+ return cepstrum
+
+
+
+def log_spectrum(x, frame_size, fb=None, window=None, power=1):
+ """ calculate cepstrum on 50% overlapping frames """
+
+ assert(2*len(x)) % frame_size == 0
+ assert frame_size % 2 == 0
+
+ n = len(x)
+ num_even = n // frame_size
+ num_odd = (n - frame_size // 2) // frame_size
+ num_bins = frame_size // 2 + 1
+
+ x_even = x[:num_even * frame_size].reshape(-1, frame_size)
+ x_odd = x[frame_size//2 : frame_size//2 + frame_size * num_odd].reshape(-1, frame_size)
+
+ x_unfold = np.empty((x_even.size + x_odd.size), dtype=x.dtype).reshape((-1, frame_size))
+ x_unfold[::2, :] = x_even
+ x_unfold[1::2, :] = x_odd
+
+ if window is not None:
+ x_unfold *= window.reshape(1, -1)
+
+ X = np.abs(np.fft.fft(x_unfold, n=frame_size, axis=-1))[:, :num_bins] ** power
+
+ if fb is not None:
+ X = np.matmul(X, fb.T)
+
+
+ return np.log(X + 1e-9)
+
+
+def cepstrum(x, frame_size, fb=None, window=None):
+ """ calculate cepstrum on 50% overlapping frames """
+
+ X = log_spectrum(x, frame_size, fb, window)
+
+ cepstrum = scipy.fftpack.dct(X, 2, norm='ortho')
+
+ return cepstrum \ No newline at end of file
diff --git a/dnn/torch/osce/utils/templates.py b/dnn/torch/osce/utils/templates.py
new file mode 100644
index 00000000..5fc84ef1
--- /dev/null
+++ b/dnn/torch/osce/utils/templates.py
@@ -0,0 +1,347 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+setup_dict = dict()
+
+lace_setup = {
+ 'dataset': '/local/datasets/silk_enhancement_v2_full_6to64kbps/training',
+ 'validation_dataset': '/local/datasets/silk_enhancement_v2_full_6to64kbps/validation',
+ 'model': {
+ 'name': 'lace',
+ 'args': [],
+ 'kwargs': {
+ 'comb_gain_limit_db': 10,
+ 'cond_dim': 128,
+ 'conv_gain_limits_db': [-12, 12],
+ 'global_gain_limits_db': [-6, 6],
+ 'hidden_feature_dim': 96,
+ 'kernel_size': 15,
+ 'num_features': 93,
+ 'numbits_embedding_dim': 8,
+ 'numbits_range': [50, 650],
+ 'partial_lookahead': True,
+ 'pitch_embedding_dim': 64,
+ 'pitch_max': 300,
+ 'preemph': 0.85,
+ 'skip': 91,
+ 'softquant': True,
+ 'sparsify': False,
+ 'sparsification_density': 0.4,
+ 'sparsification_schedule': [10000, 40000, 200]
+ }
+ },
+ 'data': {
+ 'frames_per_sample': 100,
+ 'no_pitch_value': 7,
+ 'preemph': 0.85,
+ 'skip': 91,
+ 'pitch_hangover': 8,
+ 'acorr_radius': 2,
+ 'num_bands_clean_spec': 64,
+ 'num_bands_noisy_spec': 18,
+ 'noisy_spec_scale': 'opus',
+ 'pitch_hangover': 0,
+ },
+ 'training': {
+ 'batch_size': 256,
+ 'lr': 5.e-4,
+ 'lr_decay_factor': 2.5e-5,
+ 'epochs': 50,
+ 'loss': {
+ 'w_l1': 0,
+ 'w_lm': 0,
+ 'w_logmel': 0,
+ 'w_sc': 0,
+ 'w_wsc': 0,
+ 'w_xcorr': 0,
+ 'w_sxcorr': 1,
+ 'w_l2': 10,
+ 'w_slm': 2
+ }
+ }
+}
+
+
+nolace_setup = {
+ 'dataset': '/local/datasets/silk_enhancement_v2_full_6to64kbps/training',
+ 'validation_dataset': '/local/datasets/silk_enhancement_v2_full_6to64kbps/validation',
+ 'model': {
+ 'name': 'nolace',
+ 'args': [],
+ 'kwargs': {
+ 'avg_pool_k': 4,
+ 'comb_gain_limit_db': 10,
+ 'cond_dim': 256,
+ 'conv_gain_limits_db': [-12, 12],
+ 'global_gain_limits_db': [-6, 6],
+ 'hidden_feature_dim': 96,
+ 'kernel_size': 15,
+ 'num_features': 93,
+ 'numbits_embedding_dim': 8,
+ 'numbits_range': [50, 650],
+ 'partial_lookahead': True,
+ 'pitch_embedding_dim': 64,
+ 'pitch_max': 300,
+ 'preemph': 0.85,
+ 'skip': 91,
+ 'softquant': True,
+ 'sparsify': False,
+ 'sparsification_density': 0.4,
+ 'sparsification_schedule': [10000, 40000, 200]
+ }
+ },
+ 'data': {
+ 'frames_per_sample': 100,
+ 'no_pitch_value': 7,
+ 'preemph': 0.85,
+ 'skip': 91,
+ 'pitch_hangover': 8,
+ 'acorr_radius': 2,
+ 'num_bands_clean_spec': 64,
+ 'num_bands_noisy_spec': 18,
+ 'noisy_spec_scale': 'opus',
+ 'pitch_hangover': 0,
+ },
+ 'training': {
+ 'batch_size': 256,
+ 'lr': 5.e-4,
+ 'lr_decay_factor': 2.5e-5,
+ 'epochs': 50,
+ 'loss': {
+ 'w_l1': 0,
+ 'w_lm': 0,
+ 'w_logmel': 0,
+ 'w_sc': 0,
+ 'w_wsc': 0,
+ 'w_xcorr': 0,
+ 'w_sxcorr': 1,
+ 'w_l2': 10,
+ 'w_slm': 2
+ }
+ }
+}
+
+nolace_setup_adv = {
+ 'dataset': '/local/datasets/silk_enhancement_v2_full_6to64kbps/training',
+ 'model': {
+ 'name': 'nolace',
+ 'args': [],
+ 'kwargs': {
+ 'avg_pool_k': 4,
+ 'comb_gain_limit_db': 10,
+ 'cond_dim': 256,
+ 'conv_gain_limits_db': [-12, 12],
+ 'global_gain_limits_db': [-6, 6],
+ 'hidden_feature_dim': 96,
+ 'kernel_size': 15,
+ 'num_features': 93,
+ 'numbits_embedding_dim': 8,
+ 'numbits_range': [50, 650],
+ 'partial_lookahead': True,
+ 'pitch_embedding_dim': 64,
+ 'pitch_max': 300,
+ 'preemph': 0.85,
+ 'skip': 91,
+ 'softquant': True,
+ 'sparsify': False,
+ 'sparsification_density': 0.4,
+ 'sparsification_schedule': [0, 0, 200]
+ }
+ },
+ 'data': {
+ 'frames_per_sample': 100,
+ 'no_pitch_value': 7,
+ 'preemph': 0.85,
+ 'skip': 91,
+ 'pitch_hangover': 8,
+ 'acorr_radius': 2,
+ 'num_bands_clean_spec': 64,
+ 'num_bands_noisy_spec': 18,
+ 'noisy_spec_scale': 'opus',
+ 'pitch_hangover': 0,
+ },
+ 'discriminator': {
+ 'args': [],
+ 'kwargs': {
+ 'architecture': 'free',
+ 'design': 'f_down',
+ 'fft_sizes_16k': [
+ 64,
+ 128,
+ 256,
+ 512,
+ 1024,
+ 2048,
+ ],
+ 'freq_roi': [0, 7400],
+ 'fs': 16000,
+ 'max_channels': 256,
+ 'noise_gain': 0.0,
+ },
+ 'name': 'fdmresdisc',
+ },
+ 'training': {
+ 'adv_target': 'target_orig',
+ 'batch_size': 64,
+ 'epochs': 50,
+ 'gen_lr_reduction': 1,
+ 'lambda_feat': 1.0,
+ 'lambda_reg': 0.6,
+ 'loss': {
+ 'w_l1': 0,
+ 'w_l2': 10,
+ 'w_lm': 0,
+ 'w_logmel': 0,
+ 'w_sc': 0,
+ 'w_slm': 20,
+ 'w_sxcorr': 1,
+ 'w_wsc': 0,
+ 'w_xcorr': 0,
+ },
+ 'lr': 0.0001,
+ 'lr_decay_factor': 2.5e-09,
+ }
+}
+
+
+lavoce_setup = {
+ 'data': {
+ 'frames_per_sample': 100,
+ 'target': 'signal'
+ },
+ 'dataset': '/local/datasets/lpcnet_large/training',
+ 'model': {
+ 'args': [],
+ 'kwargs': {
+ 'comb_gain_limit_db': 10,
+ 'cond_dim': 256,
+ 'conv_gain_limits_db': [-12, 12],
+ 'global_gain_limits_db': [-6, 6],
+ 'kernel_size': 15,
+ 'num_features': 19,
+ 'pitch_embedding_dim': 64,
+ 'pitch_max': 300,
+ 'preemph': 0.85,
+ 'pulses': True
+ },
+ 'name': 'lavoce'
+ },
+ 'training': {
+ 'batch_size': 256,
+ 'epochs': 50,
+ 'loss': {
+ 'w_l1': 0,
+ 'w_l2': 0,
+ 'w_lm': 0,
+ 'w_logmel': 0,
+ 'w_sc': 0,
+ 'w_slm': 2,
+ 'w_sxcorr': 1,
+ 'w_wsc': 0,
+ 'w_xcorr': 0
+ },
+ 'lr': 0.0005,
+ 'lr_decay_factor': 2.5e-05
+ },
+ 'validation_dataset': '/local/datasets/lpcnet_large/validation'
+}
+
+lavoce_setup_adv = {
+ 'data': {
+ 'frames_per_sample': 100,
+ 'target': 'signal'
+ },
+ 'dataset': '/local/datasets/lpcnet_large/training',
+ 'discriminator': {
+ 'args': [],
+ 'kwargs': {
+ 'architecture': 'free',
+ 'design': 'f_down',
+ 'fft_sizes_16k': [
+ 64,
+ 128,
+ 256,
+ 512,
+ 1024,
+ 2048,
+ ],
+ 'freq_roi': [0, 7400],
+ 'fs': 16000,
+ 'max_channels': 256,
+ 'noise_gain': 0.0,
+ },
+ 'name': 'fdmresdisc',
+ },
+ 'model': {
+ 'args': [],
+ 'kwargs': {
+ 'comb_gain_limit_db': 10,
+ 'cond_dim': 256,
+ 'conv_gain_limits_db': [-12, 12],
+ 'global_gain_limits_db': [-6, 6],
+ 'kernel_size': 15,
+ 'num_features': 19,
+ 'pitch_embedding_dim': 64,
+ 'pitch_max': 300,
+ 'preemph': 0.85,
+ 'pulses': True
+ },
+ 'name': 'lavoce'
+ },
+ 'training': {
+ 'batch_size': 64,
+ 'epochs': 50,
+ 'gen_lr_reduction': 1,
+ 'lambda_feat': 1.0,
+ 'lambda_reg': 0.6,
+ 'loss': {
+ 'w_l1': 0,
+ 'w_l2': 0,
+ 'w_lm': 0,
+ 'w_logmel': 0,
+ 'w_sc': 0,
+ 'w_slm': 2,
+ 'w_sxcorr': 1,
+ 'w_wsc': 0,
+ 'w_xcorr': 0
+ },
+ 'lr': 0.0001,
+ 'lr_decay_factor': 2.5e-09
+ },
+}
+
+
+setup_dict = {
+ 'lace': lace_setup,
+ 'nolace': nolace_setup,
+ 'nolace_adv': nolace_setup_adv,
+ 'lavoce': lavoce_setup,
+ 'lavoce_adv': lavoce_setup_adv
+}
diff --git a/dnn/torch/plc/export_plc.py b/dnn/torch/plc/export_plc.py
new file mode 100644
index 00000000..7f153c4c
--- /dev/null
+++ b/dnn/torch/plc/export_plc.py
@@ -0,0 +1,100 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../weight-exchange'))
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint', type=str, help='model checkpoint')
+parser.add_argument('output_dir', type=str, help='output folder')
+
+args = parser.parse_args()
+
+import torch
+import numpy as np
+
+import plc
+from wexchange.torch import dump_torch_weights
+from wexchange.c_export import CWriter, print_vector
+
+def c_export(args, model):
+
+ message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}"
+
+ writer = CWriter(os.path.join(args.output_dir, "plc_data"), message=message, model_struct_name='PLCModel')
+ writer.header.write(
+f"""
+#include "opus_types.h"
+"""
+ )
+
+ dense_layers = [
+ ('dense_in', "plc_dense_in"),
+ ('dense_out', "plc_dense_out")
+ ]
+
+
+ for name, export_name in dense_layers:
+ layer = model.get_submodule(name)
+ dump_torch_weights(writer, layer, name=export_name, verbose=True, quantize=False, scale=None)
+
+
+ gru_layers = [
+ ("gru1", "plc_gru1"),
+ ("gru2", "plc_gru2"),
+ ]
+
+ max_rnn_units = max([dump_torch_weights(writer, model.get_submodule(name), export_name, verbose=True, input_sparse=False, quantize=True, scale=None, recurrent_scale=None)
+ for name, export_name in gru_layers])
+
+ writer.header.write(
+f"""
+
+#define PLC_MAX_RNN_UNITS {max_rnn_units}
+
+"""
+ )
+
+ writer.close()
+
+
+if __name__ == "__main__":
+
+ os.makedirs(args.output_dir, exist_ok=True)
+ checkpoint = torch.load(args.checkpoint, map_location='cpu')
+ model = plc.PLC(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+ model.load_state_dict(checkpoint['state_dict'], strict=False)
+ #checkpoint = torch.load(args.checkpoint, map_location='cpu')
+ #model.load_state_dict(checkpoint['state_dict'])
+ c_export(args, model)
diff --git a/dnn/torch/plc/plc.py b/dnn/torch/plc/plc.py
new file mode 100644
index 00000000..f08e564d
--- /dev/null
+++ b/dnn/torch/plc/plc.py
@@ -0,0 +1,144 @@
+import numpy as np
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.utils import weight_norm
+import math
+
+fid_dict = {}
+def dump_signal(x, filename):
+ return
+ if filename in fid_dict:
+ fid = fid_dict[filename]
+ else:
+ fid = open(filename, "w")
+ fid_dict[filename] = fid
+ x = x.detach().numpy().astype('float32')
+ x.tofile(fid)
+
+
+class IDCT(nn.Module):
+ def __init__(self, N, device=None):
+ super(IDCT, self).__init__()
+
+ self.N = N
+ n = torch.arange(N, device=device)
+ k = torch.arange(N, device=device)
+ self.table = torch.cos(torch.pi/N * (n[:,None]+.5) * k[None,:])
+ self.table[:,0] = self.table[:,0] * math.sqrt(.5)
+ self.table = self.table / math.sqrt(N/2)
+
+ def forward(self, x):
+ return F.linear(x, self.table, None)
+
+def plc_loss(N, device=None, alpha=1.0, bias=1.):
+ idct = IDCT(18, device=device)
+ def loss(y_true,y_pred):
+ mask = y_true[:,:,-1:]
+ y_true = y_true[:,:,:-1]
+ e = (y_pred - y_true)*mask
+ e_bands = idct(e[:,:,:-2])
+ bias_mask = torch.clamp(4*y_true[:,:,-1:], min=0., max=1.)
+ l1_loss = torch.mean(torch.abs(e))
+ ceps_loss = torch.mean(torch.abs(e[:,:,:-2]))
+ band_loss = torch.mean(torch.abs(e_bands))
+ biased_loss = torch.mean(bias_mask*torch.clamp(e_bands, min=0.))
+ pitch_loss1 = torch.mean(torch.clamp(torch.abs(e[:,:,18:19]),max=1.))
+ pitch_loss = torch.mean(torch.clamp(torch.abs(e[:,:,18:19]),max=.4))
+ voice_bias = torch.mean(torch.clamp(-e[:,:,-1:], min=0.))
+ tot = l1_loss + 0.1*voice_bias + alpha*(band_loss + bias*biased_loss) + pitch_loss1 + 8*pitch_loss
+ return tot, l1_loss, ceps_loss, band_loss, pitch_loss
+ return loss
+
+
+# weight initialization and clipping
+def init_weights(module):
+ if isinstance(module, nn.GRU):
+ for p in module.named_parameters():
+ if p[0].startswith('weight_hh_'):
+ nn.init.orthogonal_(p[1])
+
+
+class GLU(nn.Module):
+ def __init__(self, feat_size):
+ super(GLU, self).__init__()
+
+ torch.manual_seed(5)
+
+ self.gate = weight_norm(nn.Linear(feat_size, feat_size, bias=False))
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d)\
+ or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x):
+
+ out = x * torch.sigmoid(self.gate(x))
+
+ return out
+
+class FWConv(nn.Module):
+ def __init__(self, in_size, out_size, kernel_size=2):
+ super(FWConv, self).__init__()
+
+ torch.manual_seed(5)
+
+ self.in_size = in_size
+ self.kernel_size = kernel_size
+ self.conv = weight_norm(nn.Linear(in_size*self.kernel_size, out_size, bias=False))
+ self.glu = GLU(out_size)
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d)\
+ or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x, state):
+ xcat = torch.cat((state, x), -1)
+ out = self.glu(torch.tanh(self.conv(xcat)))
+ return out, xcat[:,self.in_size:]
+
+def n(x):
+ return torch.clamp(x + (1./127.)*(torch.rand_like(x)-.5), min=-1., max=1.)
+
+class PLC(nn.Module):
+ def __init__(self, features_in=57, features_out=20, cond_size=128, gru_size=128):
+ super(PLC, self).__init__()
+
+ self.features_in = features_in
+ self.features_out = features_out
+ self.cond_size = cond_size
+ self.gru_size = gru_size
+
+ self.dense_in = nn.Linear(self.features_in, self.cond_size)
+ self.gru1 = nn.GRU(self.cond_size, self.gru_size, batch_first=True)
+ self.gru2 = nn.GRU(self.gru_size, self.gru_size, batch_first=True)
+ self.dense_out = nn.Linear(self.gru_size, features_out)
+
+ self.apply(init_weights)
+ nb_params = sum(p.numel() for p in self.parameters())
+ print(f"plc model: {nb_params} weights")
+
+ def forward(self, features, lost, states=None):
+ device = features.device
+ batch_size = features.size(0)
+ if states is None:
+ gru1_state = torch.zeros((1, batch_size, self.gru_size), device=device)
+ gru2_state = torch.zeros((1, batch_size, self.gru_size), device=device)
+ else:
+ gru1_state = states[0]
+ gru2_state = states[1]
+ x = torch.cat([features, lost], dim=-1)
+ x = torch.tanh(self.dense_in(x))
+ gru1_out, gru1_state = self.gru1(x, gru1_state)
+ gru2_out, gru2_state = self.gru2(gru1_out, gru2_state)
+ return self.dense_out(gru2_out), [gru1_state, gru2_state]
diff --git a/dnn/torch/plc/plc_dataset.py b/dnn/torch/plc/plc_dataset.py
new file mode 100644
index 00000000..2dfaaaf2
--- /dev/null
+++ b/dnn/torch/plc/plc_dataset.py
@@ -0,0 +1,60 @@
+import torch
+import numpy as np
+
+class PLCDataset(torch.utils.data.Dataset):
+ def __init__(self,
+ feature_file,
+ loss_file,
+ sequence_length=1000,
+ nb_features=20,
+ nb_burg_features=36,
+ lpc_order=16):
+
+ self.features_in = nb_features + nb_burg_features
+ self.nb_burg_features = nb_burg_features
+ total_features = self.features_in + lpc_order
+ self.sequence_length = sequence_length
+ self.nb_features = nb_features
+
+ self.features = np.memmap(feature_file, dtype='float32', mode='r')
+ self.lost = np.memmap(loss_file, dtype='int8', mode='r')
+ self.lost = self.lost.astype('float32')
+
+ self.nb_sequences = self.features.shape[0]//self.sequence_length//total_features
+
+ self.features = self.features[:self.nb_sequences*self.sequence_length*total_features]
+ self.features = self.features.reshape((self.nb_sequences, self.sequence_length, total_features))
+ self.features = self.features[:,:,:self.features_in]
+
+ #self.lost = self.lost[:(len(self.lost)//features.shape[1]-1)*features.shape[1]]
+ #self.lost = self.lost.reshape((-1, self.sequence_length))
+
+ def __len__(self):
+ return self.nb_sequences
+
+ def __getitem__(self, index):
+ features = self.features[index, :, :]
+ burg_lost = (np.random.rand(features.shape[0]) > .1).astype('float32')
+ burg_lost = np.reshape(burg_lost, (features.shape[0], 1))
+ burg_mask = np.tile(burg_lost, (1,self.nb_burg_features))
+
+ lost_offset = np.random.randint(0, high=self.lost.shape[0]-self.sequence_length)
+ lost = self.lost[lost_offset:lost_offset+self.sequence_length]
+ #randomly add a few 10-ms losses so that the model learns to handle them
+ lost = lost * (np.random.rand(lost.shape[-1]) > .02).astype('float32')
+ #randomly break long consecutive losses so we don't try too hard to predict them
+ lost = 1 - ((1-lost) * (np.random.rand(lost.shape[-1]) > .1).astype('float32'))
+ lost = np.reshape(lost, (features.shape[0], 1))
+ lost_mask = np.tile(lost, (1,features.shape[-1]))
+ in_features = features*lost_mask
+ in_features[:,:self.nb_burg_features] = in_features[:,:self.nb_burg_features]*burg_mask
+
+ #For the first frame after a loss, we don't have valid features, but the Burg estimate is valid.
+ #in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]
+ out_lost = np.copy(lost)
+ #out_lost[:,1:,:] = out_lost[:,1:,:]*out_lost[:,:-1,:]
+
+ out_features = np.concatenate([features[:,self.nb_burg_features:], 1.-out_lost], axis=-1)
+ burg_sign = 2*burg_lost - 1
+ # last dim is 1 for received packet, 0 for lost packet, and -1 when just the Burg info is missing
+ return in_features*lost_mask, lost*burg_sign, out_features
diff --git a/dnn/torch/plc/train_plc.py b/dnn/torch/plc/train_plc.py
new file mode 100644
index 00000000..12b31c4e
--- /dev/null
+++ b/dnn/torch/plc/train_plc.py
@@ -0,0 +1,145 @@
+import os
+import argparse
+import random
+import numpy as np
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import tqdm
+
+import plc
+from plc_dataset import PLCDataset
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('features', type=str, help='path to feature file in .f32 format')
+parser.add_argument('loss', type=str, help='path to signal file in .s8 format')
+parser.add_argument('output', type=str, help='path to output folder')
+
+parser.add_argument('--suffix', type=str, help="model name suffix", default="")
+parser.add_argument('--cuda-visible-devices', type=str, help="comma separates list of cuda visible device indices, default: CUDA_VISIBLE_DEVICES", default=None)
+
+
+model_group = parser.add_argument_group(title="model parameters")
+model_group.add_argument('--cond-size', type=int, help="first conditioning size, default: 128", default=128)
+model_group.add_argument('--gru-size', type=int, help="GRU size, default: 128", default=128)
+
+training_group = parser.add_argument_group(title="training parameters")
+training_group.add_argument('--batch-size', type=int, help="batch size, default: 512", default=512)
+training_group.add_argument('--lr', type=float, help='learning rate, default: 1e-3', default=1e-3)
+training_group.add_argument('--epochs', type=int, help='number of training epochs, default: 20', default=20)
+training_group.add_argument('--sequence-length', type=int, help='sequence length, default: 15', default=15)
+training_group.add_argument('--lr-decay', type=float, help='learning rate decay factor, default: 1e-4', default=1e-4)
+training_group.add_argument('--initial-checkpoint', type=str, help='initial checkpoint to start training from, default: None', default=None)
+
+args = parser.parse_args()
+
+if args.cuda_visible_devices != None:
+ os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_visible_devices
+
+# checkpoints
+checkpoint_dir = os.path.join(args.output, 'checkpoints')
+checkpoint = dict()
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+
+# training parameters
+batch_size = args.batch_size
+lr = args.lr
+epochs = args.epochs
+sequence_length = args.sequence_length
+lr_decay = args.lr_decay
+
+adam_betas = [0.8, 0.95]
+adam_eps = 1e-8
+features_file = args.features
+loss_file = args.loss
+
+# model parameters
+cond_size = args.cond_size
+
+
+checkpoint['batch_size'] = batch_size
+checkpoint['lr'] = lr
+checkpoint['lr_decay'] = lr_decay
+checkpoint['epochs'] = epochs
+checkpoint['sequence_length'] = sequence_length
+checkpoint['adam_betas'] = adam_betas
+
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+checkpoint['model_args'] = ()
+checkpoint['model_kwargs'] = {'cond_size': cond_size, 'gru_size': args.gru_size}
+print(checkpoint['model_kwargs'])
+model = plc.PLC(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+
+if type(args.initial_checkpoint) != type(None):
+ checkpoint = torch.load(args.initial_checkpoint, map_location='cpu')
+ model.load_state_dict(checkpoint['state_dict'], strict=False)
+
+checkpoint['state_dict'] = model.state_dict()
+
+
+dataset = PLCDataset(features_file, loss_file, sequence_length=sequence_length)
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4)
+
+
+optimizer = torch.optim.AdamW(model.parameters(), lr=lr, betas=adam_betas, eps=adam_eps)
+
+
+# learning rate scheduler
+scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay * x))
+
+states = None
+
+plc_loss = plc.plc_loss(18, device=device)
+if __name__ == '__main__':
+ model.to(device)
+
+ for epoch in range(1, epochs + 1):
+
+ running_loss = 0
+ running_l1_loss = 0
+ running_ceps_loss = 0
+ running_band_loss = 0
+ running_pitch_loss = 0
+
+ print(f"training epoch {epoch}...")
+ with tqdm.tqdm(dataloader, unit='batch') as tepoch:
+ for i, (features, lost, target) in enumerate(tepoch):
+ optimizer.zero_grad()
+ features = features.to(device)
+ lost = lost.to(device)
+ target = target.to(device)
+
+ out, states = model(features, lost)
+
+ loss, l1_loss, ceps_loss, band_loss, pitch_loss = plc_loss(target, out)
+
+ loss.backward()
+ optimizer.step()
+
+ #model.clip_weights()
+
+ scheduler.step()
+
+ running_loss += loss.detach().cpu().item()
+ running_l1_loss += l1_loss.detach().cpu().item()
+ running_ceps_loss += ceps_loss.detach().cpu().item()
+ running_band_loss += band_loss.detach().cpu().item()
+ running_pitch_loss += pitch_loss.detach().cpu().item()
+ tepoch.set_postfix(loss=f"{running_loss/(i+1):8.5f}",
+ l1_loss=f"{running_l1_loss/(i+1):8.5f}",
+ ceps_loss=f"{running_ceps_loss/(i+1):8.5f}",
+ band_loss=f"{running_band_loss/(i+1):8.5f}",
+ pitch_loss=f"{running_pitch_loss/(i+1):8.5f}",
+ )
+
+ # save checkpoint
+ checkpoint_path = os.path.join(checkpoint_dir, f'plc{args.suffix}_{epoch}.pth')
+ checkpoint['state_dict'] = model.state_dict()
+ checkpoint['loss'] = running_loss / len(dataloader)
+ checkpoint['epoch'] = epoch
+ torch.save(checkpoint, checkpoint_path)
diff --git a/dnn/torch/rdovae/README.md b/dnn/torch/rdovae/README.md
new file mode 100644
index 00000000..14359d82
--- /dev/null
+++ b/dnn/torch/rdovae/README.md
@@ -0,0 +1,24 @@
+# Rate-Distortion-Optimized Variational Auto-Encoder
+
+## Setup
+The python code requires python >= 3.6 and has been tested with python 3.6 and python 3.10. To install requirements run
+```
+python -m pip install -r requirements.txt
+```
+
+## Training
+To generate training data use dump date from the main LPCNet repo
+```
+./dump_data -train 16khz_speech_input.s16 features.f32 data.s16
+```
+
+To train the model, simply run
+```
+python train_rdovae.py features.f32 output_folder
+```
+
+To train on CUDA device add `--cuda-visible-devices idx`.
+
+
+## ToDo
+- Upload checkpoints and add URLs
diff --git a/dnn/torch/rdovae/export_rdovae_weights.py b/dnn/torch/rdovae/export_rdovae_weights.py
new file mode 100644
index 00000000..3ef9fabd
--- /dev/null
+++ b/dnn/torch/rdovae/export_rdovae_weights.py
@@ -0,0 +1,365 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../weight-exchange'))
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint', type=str, help='rdovae model checkpoint')
+parser.add_argument('output_dir', type=str, help='output folder')
+parser.add_argument('--format', choices=['C', 'numpy'], help='output format, default: C', default='C')
+
+args = parser.parse_args()
+
+import torch
+import numpy as np
+
+from rdovae import RDOVAE
+from wexchange.torch import dump_torch_weights
+from wexchange.c_export import CWriter, print_vector
+
+def print_xml(xmlout, val, param, anchor, name):
+ xmlout.write(
+f"""
+ <table anchor="{anchor}_{name}">
+ <name>{param} values for {name}</name>
+ <thead>
+ <tr><th>k</th><th>Q0</th><th>Q1</th><th>Q2</th><th>Q3</th><th>Q4</th><th>Q5</th><th>Q6</th><th>Q7</th><th>Q8</th><th>Q9</th><th>Q10</th><th>Q11</th><th>Q12</th><th>Q13</th><th>Q14</th><th>Q15</th></tr>
+ </thead>
+ <tbody>
+""")
+ for k in range(val.shape[1]):
+ xmlout.write(f" <tr><th>{k}</th>")
+ for j in range(val.shape[0]):
+ xmlout.write(f"<th>{val[j][k]}</th>")
+ xmlout.write("</tr>\n")
+ xmlout.write(
+f"""
+ </tbody>
+ </table>
+""")
+def dump_statistical_model(writer, w, name, xmlout):
+ levels = w.shape[0]
+
+ print("printing statistical model")
+ quant_scales = torch.nn.functional.softplus(w[:, 0, :]).numpy()
+ dead_zone = 0.05 * torch.nn.functional.softplus(w[:, 1, :]).numpy()
+ r = torch.sigmoid(w[:, 5 , :]).numpy()
+ p0 = torch.sigmoid(w[:, 4 , :]).numpy()
+ p0 = 1 - r ** (0.5 + 0.5 * p0)
+
+ scales_norm = 255./256./(1e-15+np.max(quant_scales,axis=0))
+ quant_scales = quant_scales*scales_norm
+ quant_scales_q8 = np.round(quant_scales * 2**8).astype(np.uint16)
+ dead_zone_q8 = np.clip(np.round(dead_zone * 2**8), 0, 255).astype(np.uint16)
+ r_q8 = np.clip(np.round(r * 2**8), 0, 255).astype(np.uint8)
+ p0_q8 = np.clip(np.round(p0 * 2**8), 0, 255).astype(np.uint16)
+
+ mask = (np.max(r_q8,axis=0) > 0) * (np.min(p0_q8,axis=0) < 255)
+ quant_scales_q8 = quant_scales_q8[:, mask]
+ dead_zone_q8 = dead_zone_q8[:, mask]
+ r_q8 = r_q8[:, mask]
+ p0_q8 = p0_q8[:, mask]
+ N = r_q8.shape[-1]
+
+ print_vector(writer.source, quant_scales_q8, f'dred_{name}_quant_scales_q8', dtype='opus_uint8', static=False)
+ print_vector(writer.source, dead_zone_q8, f'dred_{name}_dead_zone_q8', dtype='opus_uint8', static=False)
+ print_vector(writer.source, r_q8, f'dred_{name}_r_q8', dtype='opus_uint8', static=False)
+ print_vector(writer.source, p0_q8, f'dred_{name}_p0_q8', dtype='opus_uint8', static=False)
+
+ print_xml(xmlout, quant_scales_q8, "Scale", "scale", name)
+ print_xml(xmlout, dead_zone_q8, "Dead zone", "deadzone", name)
+ print_xml(xmlout, r_q8, "Decay (r)", "decay", name)
+ print_xml(xmlout, p0_q8, "P(0)", "p0", name)
+
+ writer.header.write(
+f"""
+extern const opus_uint8 dred_{name}_quant_scales_q8[{levels * N}];
+extern const opus_uint8 dred_{name}_dead_zone_q8[{levels * N}];
+extern const opus_uint8 dred_{name}_r_q8[{levels * N}];
+extern const opus_uint8 dred_{name}_p0_q8[{levels * N}];
+
+"""
+ )
+ return N, mask, torch.tensor(scales_norm[mask])
+
+
+def c_export(args, model):
+
+ message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}"
+
+ enc_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_enc_data"), message=message, model_struct_name='RDOVAEEnc')
+ dec_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_dec_data"), message=message, model_struct_name='RDOVAEDec')
+ stats_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_stats_data"), message=message, enable_binary_blob=False)
+ constants_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_constants"), message=message, header_only=True, enable_binary_blob=False)
+ xmlout = open("stats.xml", "w")
+
+ # some custom includes
+ for writer in [enc_writer, dec_writer]:
+ writer.header.write(
+f"""
+#include "opus_types.h"
+
+#include "dred_rdovae.h"
+
+#include "dred_rdovae_constants.h"
+
+"""
+ )
+
+ stats_writer.header.write(
+f"""
+#include "opus_types.h"
+
+#include "dred_rdovae_constants.h"
+
+"""
+ )
+
+ latent_out = model.get_submodule('core_encoder.module.z_dense')
+ state_out = model.get_submodule('core_encoder.module.state_dense_2')
+ orig_latent_dim = latent_out.weight.shape[0]
+ orig_state_dim = state_out.weight.shape[0]
+ # statistical model
+ qembedding = model.statistical_model.quant_embedding.weight.detach()
+ levels = qembedding.shape[0]
+ qembedding = torch.reshape(qembedding, (levels, 6, -1))
+
+ latent_dim, latent_mask, latent_scale = dump_statistical_model(stats_writer, qembedding[:, :, :orig_latent_dim], 'latent', xmlout)
+ state_dim, state_mask, state_scale = dump_statistical_model(stats_writer, qembedding[:, :, orig_latent_dim:], 'state', xmlout)
+
+ padded_latent_dim = (latent_dim+7)//8*8
+ latent_pad = padded_latent_dim - latent_dim;
+ w = latent_out.weight[latent_mask,:]
+ w = w/latent_scale[:, None]
+ w = torch.cat([w, torch.zeros(latent_pad, w.shape[1])], dim=0)
+ b = latent_out.bias[latent_mask]
+ b = b/latent_scale
+ b = torch.cat([b, torch.zeros(latent_pad)], dim=0)
+ latent_out.weight = torch.nn.Parameter(w)
+ latent_out.bias = torch.nn.Parameter(b)
+
+ padded_state_dim = (state_dim+7)//8*8
+ state_pad = padded_state_dim - state_dim;
+ w = state_out.weight[state_mask,:]
+ w = w/state_scale[:, None]
+ w = torch.cat([w, torch.zeros(state_pad, w.shape[1])], dim=0)
+ b = state_out.bias[state_mask]
+ b = b/state_scale
+ b = torch.cat([b, torch.zeros(state_pad)], dim=0)
+ state_out.weight = torch.nn.Parameter(w)
+ state_out.bias = torch.nn.Parameter(b)
+
+ latent_in = model.get_submodule('core_decoder.module.dense_1')
+ state_in = model.get_submodule('core_decoder.module.hidden_init')
+ latent_in.weight = torch.nn.Parameter(latent_in.weight[:,latent_mask]*latent_scale)
+ state_in.weight = torch.nn.Parameter(state_in.weight[:,state_mask]*state_scale)
+
+ # encoder
+ encoder_dense_layers = [
+ ('core_encoder.module.dense_1' , 'enc_dense1', 'TANH', False,),
+ ('core_encoder.module.z_dense' , 'enc_zdense', 'LINEAR', True,),
+ ('core_encoder.module.state_dense_1' , 'gdense1' , 'TANH', True,),
+ ('core_encoder.module.state_dense_2' , 'gdense2' , 'TANH', True)
+ ]
+
+ for name, export_name, _, quantize in encoder_dense_layers:
+ layer = model.get_submodule(name)
+ dump_torch_weights(enc_writer, layer, name=export_name, verbose=True, quantize=quantize, scale=None)
+
+
+ encoder_gru_layers = [
+ ('core_encoder.module.gru1' , 'enc_gru1', 'TANH', True),
+ ('core_encoder.module.gru2' , 'enc_gru2', 'TANH', True),
+ ('core_encoder.module.gru3' , 'enc_gru3', 'TANH', True),
+ ('core_encoder.module.gru4' , 'enc_gru4', 'TANH', True),
+ ('core_encoder.module.gru5' , 'enc_gru5', 'TANH', True),
+ ]
+
+ enc_max_rnn_units = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, verbose=True, input_sparse=True, quantize=quantize, scale=None, recurrent_scale=None)
+ for name, export_name, _, quantize in encoder_gru_layers])
+
+
+ encoder_conv_layers = [
+ ('core_encoder.module.conv1.conv' , 'enc_conv1', 'TANH', True),
+ ('core_encoder.module.conv2.conv' , 'enc_conv2', 'TANH', True),
+ ('core_encoder.module.conv3.conv' , 'enc_conv3', 'TANH', True),
+ ('core_encoder.module.conv4.conv' , 'enc_conv4', 'TANH', True),
+ ('core_encoder.module.conv5.conv' , 'enc_conv5', 'TANH', True),
+ ]
+
+ enc_max_conv_inputs = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, verbose=True, quantize=quantize, scale=None) for name, export_name, _, quantize in encoder_conv_layers])
+
+
+ del enc_writer
+
+ # decoder
+ decoder_dense_layers = [
+ ('core_decoder.module.dense_1' , 'dec_dense1', 'TANH', False),
+ ('core_decoder.module.glu1.gate' , 'dec_glu1', 'TANH', True),
+ ('core_decoder.module.glu2.gate' , 'dec_glu2', 'TANH', True),
+ ('core_decoder.module.glu3.gate' , 'dec_glu3', 'TANH', True),
+ ('core_decoder.module.glu4.gate' , 'dec_glu4', 'TANH', True),
+ ('core_decoder.module.glu5.gate' , 'dec_glu5', 'TANH', True),
+ ('core_decoder.module.output' , 'dec_output', 'LINEAR', True),
+ ('core_decoder.module.hidden_init' , 'dec_hidden_init', 'TANH', False),
+ ('core_decoder.module.gru_init' , 'dec_gru_init','TANH', True),
+ ]
+
+ for name, export_name, _, quantize in decoder_dense_layers:
+ layer = model.get_submodule(name)
+ dump_torch_weights(dec_writer, layer, name=export_name, verbose=True, quantize=quantize, scale=None)
+
+
+ decoder_gru_layers = [
+ ('core_decoder.module.gru1' , 'dec_gru1', 'TANH', True),
+ ('core_decoder.module.gru2' , 'dec_gru2', 'TANH', True),
+ ('core_decoder.module.gru3' , 'dec_gru3', 'TANH', True),
+ ('core_decoder.module.gru4' , 'dec_gru4', 'TANH', True),
+ ('core_decoder.module.gru5' , 'dec_gru5', 'TANH', True),
+ ]
+
+ dec_max_rnn_units = max([dump_torch_weights(dec_writer, model.get_submodule(name), export_name, verbose=True, input_sparse=True, quantize=quantize, scale=None, recurrent_scale=None)
+ for name, export_name, _, quantize in decoder_gru_layers])
+
+ decoder_conv_layers = [
+ ('core_decoder.module.conv1.conv' , 'dec_conv1', 'TANH', True),
+ ('core_decoder.module.conv2.conv' , 'dec_conv2', 'TANH', True),
+ ('core_decoder.module.conv3.conv' , 'dec_conv3', 'TANH', True),
+ ('core_decoder.module.conv4.conv' , 'dec_conv4', 'TANH', True),
+ ('core_decoder.module.conv5.conv' , 'dec_conv5', 'TANH', True),
+ ]
+
+ dec_max_conv_inputs = max([dump_torch_weights(dec_writer, model.get_submodule(name), export_name, verbose=True, quantize=quantize, scale=None) for name, export_name, _, quantize in decoder_conv_layers])
+
+ del dec_writer
+
+ del stats_writer
+
+ # constants
+ constants_writer.header.write(
+f"""
+#define DRED_NUM_FEATURES {model.feature_dim}
+
+#define DRED_LATENT_DIM {latent_dim}
+
+#define DRED_STATE_DIM {state_dim}
+
+#define DRED_PADDED_LATENT_DIM {padded_latent_dim}
+
+#define DRED_PADDED_STATE_DIM {padded_state_dim}
+
+#define DRED_NUM_QUANTIZATION_LEVELS {model.quant_levels}
+
+#define DRED_MAX_RNN_NEURONS {max(enc_max_rnn_units, dec_max_rnn_units)}
+
+#define DRED_MAX_CONV_INPUTS {max(enc_max_conv_inputs, dec_max_conv_inputs)}
+
+#define DRED_ENC_MAX_RNN_NEURONS {enc_max_conv_inputs}
+
+#define DRED_ENC_MAX_CONV_INPUTS {enc_max_conv_inputs}
+
+#define DRED_DEC_MAX_RNN_NEURONS {dec_max_rnn_units}
+
+"""
+ )
+
+ del constants_writer
+
+
+def numpy_export(args, model):
+
+ exchange_name_to_name = {
+ 'encoder_stack_layer1_dense' : 'core_encoder.module.dense_1',
+ 'encoder_stack_layer3_dense' : 'core_encoder.module.dense_2',
+ 'encoder_stack_layer5_dense' : 'core_encoder.module.dense_3',
+ 'encoder_stack_layer7_dense' : 'core_encoder.module.dense_4',
+ 'encoder_stack_layer8_dense' : 'core_encoder.module.dense_5',
+ 'encoder_state_layer1_dense' : 'core_encoder.module.state_dense_1',
+ 'encoder_state_layer2_dense' : 'core_encoder.module.state_dense_2',
+ 'encoder_stack_layer2_gru' : 'core_encoder.module.gru_1',
+ 'encoder_stack_layer4_gru' : 'core_encoder.module.gru_2',
+ 'encoder_stack_layer6_gru' : 'core_encoder.module.gru_3',
+ 'encoder_stack_layer9_conv' : 'core_encoder.module.conv1',
+ 'statistical_model_embedding' : 'statistical_model.quant_embedding',
+ 'decoder_state1_dense' : 'core_decoder.module.gru_1_init',
+ 'decoder_state2_dense' : 'core_decoder.module.gru_2_init',
+ 'decoder_state3_dense' : 'core_decoder.module.gru_3_init',
+ 'decoder_stack_layer1_dense' : 'core_decoder.module.dense_1',
+ 'decoder_stack_layer3_dense' : 'core_decoder.module.dense_2',
+ 'decoder_stack_layer5_dense' : 'core_decoder.module.dense_3',
+ 'decoder_stack_layer7_dense' : 'core_decoder.module.dense_4',
+ 'decoder_stack_layer8_dense' : 'core_decoder.module.dense_5',
+ 'decoder_stack_layer9_dense' : 'core_decoder.module.output',
+ 'decoder_stack_layer2_gru' : 'core_decoder.module.gru_1',
+ 'decoder_stack_layer4_gru' : 'core_decoder.module.gru_2',
+ 'decoder_stack_layer6_gru' : 'core_decoder.module.gru_3'
+ }
+
+ name_to_exchange_name = {value : key for key, value in exchange_name_to_name.items()}
+
+ for name, exchange_name in name_to_exchange_name.items():
+ print(f"printing layer {name}...")
+ dump_torch_weights(os.path.join(args.output_dir, exchange_name), model.get_submodule(name))
+
+
+if __name__ == "__main__":
+
+
+ os.makedirs(args.output_dir, exist_ok=True)
+
+
+ # load model from checkpoint
+ checkpoint = torch.load(args.checkpoint, map_location='cpu')
+ model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+ missing_keys, unmatched_keys = model.load_state_dict(checkpoint['state_dict'], strict=False)
+ def _remove_weight_norm(m):
+ try:
+ torch.nn.utils.remove_weight_norm(m)
+ except ValueError: # this module didn't have weight norm
+ return
+ model.apply(_remove_weight_norm)
+
+
+ if len(missing_keys) > 0:
+ raise ValueError(f"error: missing keys in state dict")
+
+ if len(unmatched_keys) > 0:
+ print(f"warning: the following keys were unmatched {unmatched_keys}")
+
+ if args.format == 'C':
+ c_export(args, model)
+ elif args.format == 'numpy':
+ numpy_export(args, model)
+ else:
+ raise ValueError(f'error: unknown export format {args.format}')
diff --git a/dnn/torch/rdovae/fec_encoder.py b/dnn/torch/rdovae/fec_encoder.py
new file mode 100644
index 00000000..20ab4ac3
--- /dev/null
+++ b/dnn/torch/rdovae/fec_encoder.py
@@ -0,0 +1,212 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe and Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import subprocess
+import argparse
+
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+parser = argparse.ArgumentParser(description='Encode redundancy for Opus neural FEC. Designed for use with voip application and 20ms frames')
+
+parser.add_argument('input', metavar='<input signal>', help='audio input (.wav or .raw or .pcm as int16)')
+parser.add_argument('checkpoint', metavar='<weights>', help='model checkpoint')
+parser.add_argument('q0', metavar='<quant level 0>', type=int, help='quantization level for most recent frame')
+parser.add_argument('q1', metavar='<quant level 1>', type=int, help='quantization level for oldest frame')
+parser.add_argument('output', type=str, help='output file (will be extended with .fec)')
+
+parser.add_argument('--dump-data', type=str, default='./dump_data', help='path to dump data executable (default ./dump_data)')
+parser.add_argument('--num-redundancy-frames', default=52, type=int, help='number of redundancy frames per packet (default 52)')
+parser.add_argument('--extra-delay', default=0, type=int, help="last features in packet are calculated with the decoder aligned samples, use this option to add extra delay (in samples at 16kHz)")
+parser.add_argument('--lossfile', type=str, help='file containing loss trace (0 for frame received, 1 for lost)')
+parser.add_argument('--debug-output', action='store_true', help='if set, differently assembled features are written to disk')
+
+args = parser.parse_args()
+
+import numpy as np
+from scipy.io import wavfile
+import torch
+
+from rdovae import RDOVAE
+from packets import write_fec_packets
+
+torch.set_num_threads(4)
+
+checkpoint = torch.load(args.checkpoint, map_location="cpu")
+model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+model.load_state_dict(checkpoint['state_dict'], strict=False)
+model.to("cpu")
+
+lpc_order = 16
+
+## prepare input signal
+# SILK frame size is 20ms and LPCNet subframes are 10ms
+subframe_size = 160
+frame_size = 2 * subframe_size
+
+# 91 samples delay to align with SILK decoded frames
+silk_delay = 91
+
+# prepend zeros to have enough history to produce the first package
+zero_history = (args.num_redundancy_frames - 1) * frame_size
+
+# dump data has a (feature) delay of 10ms
+dump_data_delay = 160
+
+total_delay = silk_delay + zero_history + args.extra_delay - dump_data_delay
+
+# load signal
+if args.input.endswith('.raw') or args.input.endswith('.pcm'):
+ signal = np.fromfile(args.input, dtype='int16')
+
+elif args.input.endswith('.wav'):
+ fs, signal = wavfile.read(args.input)
+else:
+ raise ValueError(f'unknown input signal format: {args.input}')
+
+# fill up last frame with zeros
+padded_signal_length = len(signal) + total_delay
+tail = padded_signal_length % frame_size
+right_padding = (frame_size - tail) % frame_size
+
+signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16)))
+
+padded_signal_file = os.path.splitext(args.input)[0] + '_padded.raw'
+signal.tofile(padded_signal_file)
+
+# write signal and call dump_data to create features
+
+feature_file = os.path.splitext(args.input)[0] + '_features.f32'
+command = f"{args.dump_data} -test {padded_signal_file} {feature_file}"
+r = subprocess.run(command, shell=True)
+if r.returncode != 0:
+ raise RuntimeError(f"command '{command}' failed with exit code {r.returncode}")
+
+# load features
+nb_features = model.feature_dim + lpc_order
+nb_used_features = model.feature_dim
+
+# load features
+features = np.fromfile(feature_file, dtype='float32')
+num_subframes = len(features) // nb_features
+num_subframes = 2 * (num_subframes // 2)
+num_frames = num_subframes // 2
+
+features = np.reshape(features, (1, -1, nb_features))
+features = features[:, :, :nb_used_features]
+features = features[:, :num_subframes, :]
+
+# quant_ids in reverse decoding order
+quant_ids = torch.round((args.q1 + (args.q0 - args.q1) * torch.arange(args.num_redundancy_frames // 2) / (args.num_redundancy_frames // 2 - 1))).long()
+
+print(f"using quantization levels {quant_ids}...")
+
+# convert input to torch tensors
+features = torch.from_numpy(features)
+
+
+# run encoder
+print("running fec encoder...")
+with torch.no_grad():
+
+ # encoding
+ z, states, state_size = model.encode(features)
+
+
+ # decoder on packet chunks
+ input_length = args.num_redundancy_frames // 2
+ offset = args.num_redundancy_frames - 1
+
+ packets = []
+ packet_sizes = []
+
+ for i in range(offset, num_frames):
+ print(f"processing frame {i - offset}...")
+ # quantize / unquantize latent vectors
+ zi = torch.clone(z[:, i - 2 * input_length + 2: i + 1 : 2, :])
+ zi, rates = model.quantize(zi, quant_ids)
+ zi = model.unquantize(zi, quant_ids)
+
+ features = model.decode(zi, states[:, i : i + 1, :])
+ packets.append(features.squeeze(0).numpy())
+ packet_size = 8 * int((torch.sum(rates) + 7 + state_size) / 8)
+ packet_sizes.append(packet_size)
+
+
+# write packets
+packet_file = args.output + '.fec' if not args.output.endswith('.fec') else args.output
+write_fec_packets(packet_file, packets, packet_sizes)
+
+
+print(f"average redundancy rate: {int(round(sum(packet_sizes) / len(packet_sizes) * 50 / 1000))} kbps")
+
+# assemble features according to loss file
+if args.lossfile != None:
+ num_packets = len(packets)
+ loss = np.loadtxt(args.lossfile, dtype='int16')
+ fec_out = np.zeros((num_packets * 2, packets[0].shape[-1]), dtype='float32')
+ foffset = -2
+ ptr = 0
+ count = 2
+ for i in range(num_packets):
+ if (loss[i] == 0) or (i == num_packets - 1):
+
+ fec_out[ptr:ptr+count,:] = packets[i][foffset:, :]
+
+ ptr += count
+ foffset = -2
+ count = 2
+ else:
+ count += 2
+ foffset -= 2
+
+ fec_out_full = np.zeros((fec_out.shape[0], 36), dtype=np.float32)
+ fec_out_full[:, : fec_out.shape[-1]] = fec_out
+
+ fec_out_full.tofile(packet_file[:-4] + f'_fec.f32')
+
+
+if args.debug_output:
+ import itertools
+
+ batches = [4]
+ offsets = [0, 2 * args.num_redundancy_frames - 4]
+
+ # sanity checks
+ # 1. concatenate features at offset 0
+ for batch, offset in itertools.product(batches, offsets):
+
+ stop = packets[0].shape[1] - offset
+ test_features = np.concatenate([packet[stop - batch: stop, :] for packet in packets[::batch//2]], axis=0)
+
+ test_features_full = np.zeros((test_features.shape[0], nb_features), dtype=np.float32)
+ test_features_full[:, :nb_used_features] = test_features[:, :]
+
+ print(f"writing debug output {packet_file[:-4] + f'_torch_batch{batch}_offset{offset}.f32'}")
+ test_features_full.tofile(packet_file[:-4] + f'_torch_batch{batch}_offset{offset}.f32')
diff --git a/dnn/torch/rdovae/import_rdovae_weights.py b/dnn/torch/rdovae/import_rdovae_weights.py
new file mode 100644
index 00000000..c824986d
--- /dev/null
+++ b/dnn/torch/rdovae/import_rdovae_weights.py
@@ -0,0 +1,143 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+import argparse
+
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('exchange_folder', type=str, help='exchange folder path')
+parser.add_argument('output', type=str, help='path to output model checkpoint')
+
+model_group = parser.add_argument_group(title="model parameters")
+model_group.add_argument('--num-features', type=int, help="number of features, default: 20", default=20)
+model_group.add_argument('--latent-dim', type=int, help="number of symbols produces by encoder, default: 80", default=80)
+model_group.add_argument('--cond-size', type=int, help="first conditioning size, default: 256", default=256)
+model_group.add_argument('--cond-size2', type=int, help="second conditioning size, default: 256", default=256)
+model_group.add_argument('--state-dim', type=int, help="dimensionality of transfered state, default: 24", default=24)
+model_group.add_argument('--quant-levels', type=int, help="number of quantization levels, default: 40", default=40)
+
+args = parser.parse_args()
+
+import torch
+from rdovae import RDOVAE
+from wexchange.torch import load_torch_weights
+
+exchange_name_to_name = {
+ 'encoder_stack_layer1_dense' : 'core_encoder.module.dense_1',
+ 'encoder_stack_layer3_dense' : 'core_encoder.module.dense_2',
+ 'encoder_stack_layer5_dense' : 'core_encoder.module.dense_3',
+ 'encoder_stack_layer7_dense' : 'core_encoder.module.dense_4',
+ 'encoder_stack_layer8_dense' : 'core_encoder.module.dense_5',
+ 'encoder_state_layer1_dense' : 'core_encoder.module.state_dense_1',
+ 'encoder_state_layer2_dense' : 'core_encoder.module.state_dense_2',
+ 'encoder_stack_layer2_gru' : 'core_encoder.module.gru_1',
+ 'encoder_stack_layer4_gru' : 'core_encoder.module.gru_2',
+ 'encoder_stack_layer6_gru' : 'core_encoder.module.gru_3',
+ 'encoder_stack_layer9_conv' : 'core_encoder.module.conv1',
+ 'statistical_model_embedding' : 'statistical_model.quant_embedding',
+ 'decoder_state1_dense' : 'core_decoder.module.gru_1_init',
+ 'decoder_state2_dense' : 'core_decoder.module.gru_2_init',
+ 'decoder_state3_dense' : 'core_decoder.module.gru_3_init',
+ 'decoder_stack_layer1_dense' : 'core_decoder.module.dense_1',
+ 'decoder_stack_layer3_dense' : 'core_decoder.module.dense_2',
+ 'decoder_stack_layer5_dense' : 'core_decoder.module.dense_3',
+ 'decoder_stack_layer7_dense' : 'core_decoder.module.dense_4',
+ 'decoder_stack_layer8_dense' : 'core_decoder.module.dense_5',
+ 'decoder_stack_layer9_dense' : 'core_decoder.module.output',
+ 'decoder_stack_layer2_gru' : 'core_decoder.module.gru_1',
+ 'decoder_stack_layer4_gru' : 'core_decoder.module.gru_2',
+ 'decoder_stack_layer6_gru' : 'core_decoder.module.gru_3'
+}
+
+if __name__ == "__main__":
+ checkpoint = dict()
+
+ # parameters
+ num_features = args.num_features
+ latent_dim = args.latent_dim
+ quant_levels = args.quant_levels
+ cond_size = args.cond_size
+ cond_size2 = args.cond_size2
+ state_dim = args.state_dim
+
+
+ # model
+ checkpoint['model_args'] = (num_features, latent_dim, quant_levels, cond_size, cond_size2)
+ checkpoint['model_kwargs'] = {'state_dim': state_dim}
+ model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+
+ dense_layer_names = [
+ 'encoder_stack_layer1_dense',
+ 'encoder_stack_layer3_dense',
+ 'encoder_stack_layer5_dense',
+ 'encoder_stack_layer7_dense',
+ 'encoder_stack_layer8_dense',
+ 'encoder_state_layer1_dense',
+ 'encoder_state_layer2_dense',
+ 'decoder_state1_dense',
+ 'decoder_state2_dense',
+ 'decoder_state3_dense',
+ 'decoder_stack_layer1_dense',
+ 'decoder_stack_layer3_dense',
+ 'decoder_stack_layer5_dense',
+ 'decoder_stack_layer7_dense',
+ 'decoder_stack_layer8_dense',
+ 'decoder_stack_layer9_dense'
+ ]
+
+ gru_layer_names = [
+ 'encoder_stack_layer2_gru',
+ 'encoder_stack_layer4_gru',
+ 'encoder_stack_layer6_gru',
+ 'decoder_stack_layer2_gru',
+ 'decoder_stack_layer4_gru',
+ 'decoder_stack_layer6_gru'
+ ]
+
+ conv1d_layer_names = [
+ 'encoder_stack_layer9_conv'
+ ]
+
+ embedding_layer_names = [
+ 'statistical_model_embedding'
+ ]
+
+ for name in dense_layer_names + gru_layer_names + conv1d_layer_names + embedding_layer_names:
+ print(f"loading weights for layer {exchange_name_to_name[name]}")
+ layer = model.get_submodule(exchange_name_to_name[name])
+ load_torch_weights(os.path.join(args.exchange_folder, name), layer)
+
+ checkpoint['state_dict'] = model.state_dict()
+
+ torch.save(checkpoint, args.output) \ No newline at end of file
diff --git a/dnn/torch/rdovae/packets/__init__.py b/dnn/torch/rdovae/packets/__init__.py
new file mode 100644
index 00000000..fb71ab3d
--- /dev/null
+++ b/dnn/torch/rdovae/packets/__init__.py
@@ -0,0 +1 @@
+from .fec_packets import write_fec_packets, read_fec_packets \ No newline at end of file
diff --git a/dnn/torch/rdovae/packets/fec_packets.c b/dnn/torch/rdovae/packets/fec_packets.c
new file mode 100644
index 00000000..ee08ba95
--- /dev/null
+++ b/dnn/torch/rdovae/packets/fec_packets.c
@@ -0,0 +1,142 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include "fec_packets.h"
+
+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index)
+{
+
+ int16_t version;
+ int16_t header_size;
+ int16_t num_packets;
+ int16_t packet_size;
+ int16_t subframe_size;
+ int16_t subframes_per_packet;
+ int16_t num_features;
+ long offset;
+
+ FILE *fid = fopen(filename, "rb");
+
+ /* read header */
+ if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
+ if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
+ if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
+ if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
+ if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
+ if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
+ if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
+
+ /* check if indices are valid */
+ if (packet_index >= num_packets || subframe_index >= subframes_per_packet)
+ {
+ fprintf(stderr, "get_fec_frame: index out of bounds\n");
+ goto error;
+ }
+
+ /* calculate offset in file (+ 2 is for rate) */
+ offset = header_size + packet_index * packet_size + 2 + subframe_index * subframe_size;
+ fseek(fid, offset, SEEK_SET);
+
+ /* read features */
+ if (fread(features, sizeof(*features), num_features, fid) != num_features) goto error;
+
+ fclose(fid);
+ return 0;
+
+error:
+ fclose(fid);
+ return 1;
+}
+
+int get_fec_rate(const char * const filename, int packet_index)
+{
+ int16_t version;
+ int16_t header_size;
+ int16_t num_packets;
+ int16_t packet_size;
+ int16_t subframe_size;
+ int16_t subframes_per_packet;
+ int16_t num_features;
+ long offset;
+ int16_t rate;
+
+ FILE *fid = fopen(filename, "rb");
+
+ /* read header */
+ if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
+ if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
+ if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
+ if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
+ if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
+ if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
+ if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
+
+ /* check if indices are valid */
+ if (packet_index >= num_packets)
+ {
+ fprintf(stderr, "get_fec_rate: index out of bounds\n");
+ goto error;
+ }
+
+ /* calculate offset in file (+ 2 is for rate) */
+ offset = header_size + packet_index * packet_size;
+ fseek(fid, offset, SEEK_SET);
+
+ /* read rate */
+ if (fread(&rate, sizeof(rate), 1, fid) != 1) goto error;
+
+ fclose(fid);
+ return (int) rate;
+
+error:
+ fclose(fid);
+ return -1;
+}
+
+#if 0
+int main()
+{
+ float features[20];
+ int i;
+
+ if (get_fec_frame("../test.fec", &features[0], 0, 127))
+ {
+ return 1;
+ }
+
+ for (i = 0; i < 20; i ++)
+ {
+ printf("%d %f\n", i, features[i]);
+ }
+
+ printf("rate: %d\n", get_fec_rate("../test.fec", 0));
+
+}
+#endif \ No newline at end of file
diff --git a/dnn/torch/rdovae/packets/fec_packets.h b/dnn/torch/rdovae/packets/fec_packets.h
new file mode 100644
index 00000000..35d35542
--- /dev/null
+++ b/dnn/torch/rdovae/packets/fec_packets.h
@@ -0,0 +1,34 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _FEC_PACKETS_H
+#define _FEC_PACKETS_H
+
+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index);
+int get_fec_rate(const char * const filename, int packet_index);
+
+#endif \ No newline at end of file
diff --git a/dnn/torch/rdovae/packets/fec_packets.py b/dnn/torch/rdovae/packets/fec_packets.py
new file mode 100644
index 00000000..f44c1a95
--- /dev/null
+++ b/dnn/torch/rdovae/packets/fec_packets.py
@@ -0,0 +1,108 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import numpy as np
+
+
+
+def write_fec_packets(filename, packets, rates=None):
+ """ writes packets in binary format """
+
+ assert np.dtype(np.float32).itemsize == 4
+ assert np.dtype(np.int16).itemsize == 2
+
+ # derive some sizes
+ num_packets = len(packets)
+ subframes_per_packet = packets[0].shape[-2]
+ num_features = packets[0].shape[-1]
+
+ # size of float is 4
+ subframe_size = num_features * 4
+ packet_size = subframe_size * subframes_per_packet + 2 # two bytes for rate
+
+ version = 1
+ # header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features)
+ header_size = 14
+
+ with open(filename, 'wb') as f:
+
+ # header
+ f.write(np.int16(version).tobytes())
+ f.write(np.int16(header_size).tobytes())
+ f.write(np.int16(num_packets).tobytes())
+ f.write(np.int16(packet_size).tobytes())
+ f.write(np.int16(subframe_size).tobytes())
+ f.write(np.int16(subframes_per_packet).tobytes())
+ f.write(np.int16(num_features).tobytes())
+
+ # packets
+ for i, packet in enumerate(packets):
+ if type(rates) == type(None):
+ rate = 0
+ else:
+ rate = rates[i]
+
+ f.write(np.int16(rate).tobytes())
+
+ features = np.flip(packet, axis=-2)
+ f.write(features.astype(np.float32).tobytes())
+
+
+def read_fec_packets(filename):
+ """ reads packets from binary format """
+
+ assert np.dtype(np.float32).itemsize == 4
+ assert np.dtype(np.int16).itemsize == 2
+
+ with open(filename, 'rb') as f:
+
+ # header
+ version = np.frombuffer(f.read(2), dtype=np.int16).item()
+ header_size = np.frombuffer(f.read(2), dtype=np.int16).item()
+ num_packets = np.frombuffer(f.read(2), dtype=np.int16).item()
+ packet_size = np.frombuffer(f.read(2), dtype=np.int16).item()
+ subframe_size = np.frombuffer(f.read(2), dtype=np.int16).item()
+ subframes_per_packet = np.frombuffer(f.read(2), dtype=np.int16).item()
+ num_features = np.frombuffer(f.read(2), dtype=np.int16).item()
+
+ dummy_features = np.zeros((subframes_per_packet, num_features), dtype=np.float32)
+
+ # packets
+ rates = []
+ packets = []
+ for i in range(num_packets):
+
+ rate = np.frombuffer(f.read(2), dtype=np.int16).item
+ rates.append(rate)
+
+ features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape)
+ packet = np.flip(features, axis=-2)
+ packets.append(packet)
+
+ return packets \ No newline at end of file
diff --git a/dnn/torch/rdovae/rdovae/__init__.py b/dnn/torch/rdovae/rdovae/__init__.py
new file mode 100644
index 00000000..b945adde
--- /dev/null
+++ b/dnn/torch/rdovae/rdovae/__init__.py
@@ -0,0 +1,2 @@
+from .rdovae import RDOVAE, distortion_loss, hard_rate_estimate, soft_rate_estimate
+from .dataset import RDOVAEDataset
diff --git a/dnn/torch/rdovae/rdovae/dataset.py b/dnn/torch/rdovae/rdovae/dataset.py
new file mode 100644
index 00000000..cfb32b05
--- /dev/null
+++ b/dnn/torch/rdovae/rdovae/dataset.py
@@ -0,0 +1,67 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+import numpy as np
+
+class RDOVAEDataset(torch.utils.data.Dataset):
+ def __init__(self,
+ feature_file,
+ sequence_length,
+ num_used_features=20,
+ num_features=36,
+ lambda_min=0.0002,
+ lambda_max=0.0135,
+ quant_levels=16,
+ enc_stride=2):
+
+ self.sequence_length = sequence_length
+ self.lambda_min = lambda_min
+ self.lambda_max = lambda_max
+ self.enc_stride = enc_stride
+ self.quant_levels = quant_levels
+ self.denominator = (quant_levels - 1) / np.log(lambda_max / lambda_min)
+
+ if sequence_length % enc_stride:
+ raise ValueError(f"RDOVAEDataset.__init__: enc_stride {enc_stride} does not divide sequence length {sequence_length}")
+
+ self.features = np.reshape(np.fromfile(feature_file, dtype=np.float32), (-1, num_features))
+ self.features = self.features[:, :num_used_features]
+ self.num_sequences = self.features.shape[0] // sequence_length
+
+ def __len__(self):
+ return self.num_sequences
+
+ def __getitem__(self, index):
+ features = self.features[index * self.sequence_length: (index + 1) * self.sequence_length, :]
+ q_ids = np.random.randint(0, self.quant_levels, (1)).astype(np.int64)
+ q_ids = np.repeat(q_ids, self.sequence_length // self.enc_stride, axis=0)
+ rate_lambda = self.lambda_min * np.exp(q_ids.astype(np.float32) / self.denominator).astype(np.float32)
+
+ return features, rate_lambda, q_ids
diff --git a/dnn/torch/rdovae/rdovae/rdovae.py b/dnn/torch/rdovae/rdovae/rdovae.py
new file mode 100644
index 00000000..cdb07b46
--- /dev/null
+++ b/dnn/torch/rdovae/rdovae/rdovae.py
@@ -0,0 +1,719 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+""" Pytorch implementations of rate distortion optimized variational autoencoder """
+
+import math as m
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+import sys
+import os
+source_dir = os.path.split(os.path.abspath(__file__))[0]
+sys.path.append(os.path.join(source_dir, "../../lpcnet/"))
+from utils.sparsification import GRUSparsifier
+from torch.nn.utils import weight_norm
+
+# Quantization and rate related utily functions
+
+def soft_pvq(x, k):
+ """ soft pyramid vector quantizer """
+
+ # L2 normalization
+ x_norm2 = x / (1e-15 + torch.norm(x, dim=-1, keepdim=True))
+
+
+ with torch.no_grad():
+ # quantization loop, no need to track gradients here
+ x_norm1 = x / torch.sum(torch.abs(x), dim=-1, keepdim=True)
+
+ # set initial scaling factor to k
+ scale_factor = k
+ x_scaled = scale_factor * x_norm1
+ x_quant = torch.round(x_scaled)
+
+ # we aim for ||x_quant||_L1 = k
+ for _ in range(10):
+ # remove signs and calculate L1 norm
+ abs_x_quant = torch.abs(x_quant)
+ abs_x_scaled = torch.abs(x_scaled)
+ l1_x_quant = torch.sum(abs_x_quant, axis=-1)
+
+ # increase, where target is too small and decrease, where target is too large
+ plus = 1.0001 * torch.min((abs_x_quant + 0.5) / (abs_x_scaled + 1e-15), dim=-1).values
+ minus = 0.9999 * torch.max((abs_x_quant - 0.5) / (abs_x_scaled + 1e-15), dim=-1).values
+ factor = torch.where(l1_x_quant > k, minus, plus)
+ factor = torch.where(l1_x_quant == k, torch.ones_like(factor), factor)
+ scale_factor = scale_factor * factor.unsqueeze(-1)
+
+ # update x
+ x_scaled = scale_factor * x_norm1
+ x_quant = torch.round(x_quant)
+
+ # L2 normalization of quantized x
+ x_quant_norm2 = x_quant / (1e-15 + torch.norm(x_quant, dim=-1, keepdim=True))
+ quantization_error = x_quant_norm2 - x_norm2
+
+ return x_norm2 + quantization_error.detach()
+
+def cache_parameters(func):
+ cache = dict()
+ def cached_func(*args):
+ if args in cache:
+ return cache[args]
+ else:
+ cache[args] = func(*args)
+
+ return cache[args]
+ return cached_func
+
+@cache_parameters
+def pvq_codebook_size(n, k):
+
+ if k == 0:
+ return 1
+
+ if n == 0:
+ return 0
+
+ return pvq_codebook_size(n - 1, k) + pvq_codebook_size(n, k - 1) + pvq_codebook_size(n - 1, k - 1)
+
+
+def soft_rate_estimate(z, r, reduce=True):
+ """ rate approximation with dependent theta Eq. (7)"""
+
+ rate = torch.sum(
+ - torch.log2((1 - r)/(1 + r) * r ** torch.abs(z) + 1e-6),
+ dim=-1
+ )
+
+ if reduce:
+ rate = torch.mean(rate)
+
+ return rate
+
+
+def hard_rate_estimate(z, r, theta, reduce=True):
+ """ hard rate approximation """
+
+ z_q = torch.round(z)
+ p0 = 1 - r ** (0.5 + 0.5 * theta)
+ alpha = torch.relu(1 - torch.abs(z_q)) ** 2
+ rate = - torch.sum(
+ (alpha * torch.log2(p0 * r ** torch.abs(z_q) + 1e-6)
+ + (1 - alpha) * torch.log2(0.5 * (1 - p0) * (1 - r) * r ** (torch.abs(z_q) - 1) + 1e-6)),
+ dim=-1
+ )
+
+ if reduce:
+ rate = torch.mean(rate)
+
+ return rate
+
+
+
+def soft_dead_zone(x, dead_zone):
+ """ approximates application of a dead zone to x """
+ d = dead_zone * 0.05
+ return x - d * torch.tanh(x / (0.1 + d))
+
+
+def hard_quantize(x):
+ """ round with copy gradient trick """
+ return x + (torch.round(x) - x).detach()
+
+
+def noise_quantize(x):
+ """ simulates quantization with addition of random uniform noise """
+ return x + (torch.rand_like(x) - 0.5)
+
+
+# loss functions
+
+
+def distortion_loss(y_true, y_pred, rate_lambda=None):
+ """ custom distortion loss for LPCNet features """
+
+ if y_true.size(-1) != 20:
+ raise ValueError('distortion loss is designed to work with 20 features')
+
+ ceps_error = y_pred[..., :18] - y_true[..., :18]
+ pitch_error = 2*(y_pred[..., 18:19] - y_true[..., 18:19])
+ corr_error = y_pred[..., 19:] - y_true[..., 19:]
+ pitch_weight = torch.relu(y_true[..., 19:] + 0.5) ** 2
+
+ loss = torch.mean(ceps_error ** 2 + (10/18) * torch.abs(pitch_error) * pitch_weight + (1/18) * corr_error ** 2, dim=-1)
+
+ if type(rate_lambda) != type(None):
+ loss = loss / torch.sqrt(rate_lambda)
+
+ loss = torch.mean(loss)
+
+ return loss
+
+
+# sampling functions
+
+import random
+
+
+def random_split(start, stop, num_splits=3, min_len=3):
+ get_min_len = lambda x : min([x[i+1] - x[i] for i in range(len(x) - 1)])
+ candidate = [start] + sorted([random.randint(start, stop-1) for i in range(num_splits)]) + [stop]
+
+ while get_min_len(candidate) < min_len:
+ candidate = [start] + sorted([random.randint(start, stop-1) for i in range(num_splits)]) + [stop]
+
+ return candidate
+
+
+
+# weight initialization and clipping
+def init_weights(module):
+
+ if isinstance(module, nn.GRU):
+ for p in module.named_parameters():
+ if p[0].startswith('weight_hh_'):
+ nn.init.orthogonal_(p[1])
+
+
+def weight_clip_factory(max_value):
+ """ weight clipping function concerning sum of abs values of adjecent weights """
+ def clip_weight_(w):
+ stop = w.size(1)
+ # omit last column if stop is odd
+ if stop % 2:
+ stop -= 1
+ max_values = max_value * torch.ones_like(w[:, :stop])
+ factor = max_value / torch.maximum(max_values,
+ torch.repeat_interleave(
+ torch.abs(w[:, :stop:2]) + torch.abs(w[:, 1:stop:2]),
+ 2,
+ 1))
+ with torch.no_grad():
+ w[:, :stop] *= factor
+
+ def clip_weights(module):
+ if isinstance(module, nn.GRU) or isinstance(module, nn.Linear):
+ for name, w in module.named_parameters():
+ if name.startswith('weight'):
+ clip_weight_(w)
+
+ return clip_weights
+
+def n(x):
+ return torch.clamp(x + (1./127.)*(torch.rand_like(x)-.5), min=-1., max=1.)
+
+# RDOVAE module and submodules
+
+sparsify_start = 12000
+sparsify_stop = 24000
+sparsify_interval = 100
+sparsify_exponent = 3
+#sparsify_start = 0
+#sparsify_stop = 0
+
+sparse_params1 = {
+# 'W_hr' : (1.0, [8, 4], True),
+# 'W_hz' : (1.0, [8, 4], True),
+# 'W_hn' : (1.0, [8, 4], True),
+ 'W_ir' : (0.6, [8, 4], False),
+ 'W_iz' : (0.4, [8, 4], False),
+ 'W_in' : (0.8, [8, 4], False)
+ }
+
+sparse_params2 = {
+# 'W_hr' : (1.0, [8, 4], True),
+# 'W_hz' : (1.0, [8, 4], True),
+# 'W_hn' : (1.0, [8, 4], True),
+ 'W_ir' : (0.3, [8, 4], False),
+ 'W_iz' : (0.2, [8, 4], False),
+ 'W_in' : (0.4, [8, 4], False)
+ }
+
+
+class MyConv(nn.Module):
+ def __init__(self, input_dim, output_dim, dilation=1):
+ super(MyConv, self).__init__()
+ self.input_dim = input_dim
+ self.output_dim = output_dim
+ self.dilation=dilation
+ self.conv = nn.Conv1d(input_dim, output_dim, kernel_size=2, padding='valid', dilation=dilation)
+ def forward(self, x, state=None):
+ device = x.device
+ conv_in = torch.cat([torch.zeros_like(x[:,0:self.dilation,:], device=device), x], -2).permute(0, 2, 1)
+ return torch.tanh(self.conv(conv_in)).permute(0, 2, 1)
+
+class GLU(nn.Module):
+ def __init__(self, feat_size):
+ super(GLU, self).__init__()
+
+ torch.manual_seed(5)
+
+ self.gate = weight_norm(nn.Linear(feat_size, feat_size, bias=False))
+
+ self.init_weights()
+
+ def init_weights(self):
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv1d) or isinstance(m, nn.ConvTranspose1d)\
+ or isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
+ nn.init.orthogonal_(m.weight.data)
+
+ def forward(self, x):
+
+ out = x * torch.sigmoid(self.gate(x))
+
+ return out
+
+class CoreEncoder(nn.Module):
+ STATE_HIDDEN = 128
+ FRAMES_PER_STEP = 2
+ CONV_KERNEL_SIZE = 4
+
+ def __init__(self, feature_dim, output_dim, cond_size, cond_size2, state_size=24):
+ """ core encoder for RDOVAE
+
+ Computes latents, initial states, and rate estimates from features and lambda parameter
+
+ """
+
+ super(CoreEncoder, self).__init__()
+
+ # hyper parameters
+ self.feature_dim = feature_dim
+ self.output_dim = output_dim
+ self.cond_size = cond_size
+ self.cond_size2 = cond_size2
+ self.state_size = state_size
+
+ # derived parameters
+ self.input_dim = self.FRAMES_PER_STEP * self.feature_dim
+
+ # layers
+ self.dense_1 = nn.Linear(self.input_dim, 64)
+ self.gru1 = nn.GRU(64, 64, batch_first=True)
+ self.conv1 = MyConv(128, 96)
+ self.gru2 = nn.GRU(224, 64, batch_first=True)
+ self.conv2 = MyConv(288, 96, dilation=2)
+ self.gru3 = nn.GRU(384, 64, batch_first=True)
+ self.conv3 = MyConv(448, 96, dilation=2)
+ self.gru4 = nn.GRU(544, 64, batch_first=True)
+ self.conv4 = MyConv(608, 96, dilation=2)
+ self.gru5 = nn.GRU(704, 64, batch_first=True)
+ self.conv5 = MyConv(768, 96, dilation=2)
+
+ self.z_dense = nn.Linear(864, self.output_dim)
+
+
+ self.state_dense_1 = nn.Linear(864, self.STATE_HIDDEN)
+
+ self.state_dense_2 = nn.Linear(self.STATE_HIDDEN, self.state_size)
+ nb_params = sum(p.numel() for p in self.parameters())
+ print(f"encoder: {nb_params} weights")
+
+ # initialize weights
+ self.apply(init_weights)
+
+
+ def forward(self, features):
+
+ # reshape features
+ x = torch.reshape(features, (features.size(0), features.size(1) // self.FRAMES_PER_STEP, self.FRAMES_PER_STEP * features.size(2)))
+
+ batch = x.size(0)
+ device = x.device
+
+ # run encoding layer stack
+ x = n(torch.tanh(self.dense_1(x)))
+ x = torch.cat([x, n(self.gru1(x)[0])], -1)
+ x = torch.cat([x, n(self.conv1(x))], -1)
+ x = torch.cat([x, n(self.gru2(x)[0])], -1)
+ x = torch.cat([x, n(self.conv2(x))], -1)
+ x = torch.cat([x, n(self.gru3(x)[0])], -1)
+ x = torch.cat([x, n(self.conv3(x))], -1)
+ x = torch.cat([x, n(self.gru4(x)[0])], -1)
+ x = torch.cat([x, n(self.conv4(x))], -1)
+ x = torch.cat([x, n(self.gru5(x)[0])], -1)
+ x = torch.cat([x, n(self.conv5(x))], -1)
+ z = self.z_dense(x)
+
+ # init state for decoder
+ states = torch.tanh(self.state_dense_1(x))
+ states = self.state_dense_2(states)
+
+ return z, states
+
+
+
+
+class CoreDecoder(nn.Module):
+
+ FRAMES_PER_STEP = 4
+
+ def __init__(self, input_dim, output_dim, cond_size, cond_size2, state_size=24):
+ """ core decoder for RDOVAE
+
+ Computes features from latents, initial state, and quantization index
+
+ """
+
+ super(CoreDecoder, self).__init__()
+
+ # hyper parameters
+ self.input_dim = input_dim
+ self.output_dim = output_dim
+ self.cond_size = cond_size
+ self.cond_size2 = cond_size2
+ self.state_size = state_size
+
+ self.input_size = self.input_dim
+
+ # layers
+ self.dense_1 = nn.Linear(self.input_size, 96)
+ self.gru1 = nn.GRU(96, 96, batch_first=True)
+ self.conv1 = MyConv(192, 32)
+ self.gru2 = nn.GRU(224, 96, batch_first=True)
+ self.conv2 = MyConv(320, 32)
+ self.gru3 = nn.GRU(352, 96, batch_first=True)
+ self.conv3 = MyConv(448, 32)
+ self.gru4 = nn.GRU(480, 96, batch_first=True)
+ self.conv4 = MyConv(576, 32)
+ self.gru5 = nn.GRU(608, 96, batch_first=True)
+ self.conv5 = MyConv(704, 32)
+ self.output = nn.Linear(736, self.FRAMES_PER_STEP * self.output_dim)
+ self.glu1 = GLU(96)
+ self.glu2 = GLU(96)
+ self.glu3 = GLU(96)
+ self.glu4 = GLU(96)
+ self.glu5 = GLU(96)
+ self.hidden_init = nn.Linear(self.state_size, 128)
+ self.gru_init = nn.Linear(128, 480)
+
+ nb_params = sum(p.numel() for p in self.parameters())
+ print(f"decoder: {nb_params} weights")
+ # initialize weights
+ self.apply(init_weights)
+ self.sparsifier = []
+ self.sparsifier.append(GRUSparsifier([(self.gru1, sparse_params1)], sparsify_start, sparsify_stop, sparsify_interval, sparsify_exponent))
+ self.sparsifier.append(GRUSparsifier([(self.gru2, sparse_params1)], sparsify_start, sparsify_stop, sparsify_interval, sparsify_exponent))
+ self.sparsifier.append(GRUSparsifier([(self.gru3, sparse_params1)], sparsify_start, sparsify_stop, sparsify_interval, sparsify_exponent))
+ self.sparsifier.append(GRUSparsifier([(self.gru4, sparse_params2)], sparsify_start, sparsify_stop, sparsify_interval, sparsify_exponent))
+ self.sparsifier.append(GRUSparsifier([(self.gru5, sparse_params2)], sparsify_start, sparsify_stop, sparsify_interval, sparsify_exponent))
+
+ def sparsify(self):
+ for sparsifier in self.sparsifier:
+ sparsifier.step()
+
+ def forward(self, z, initial_state):
+
+ hidden = torch.tanh(self.hidden_init(initial_state))
+ gru_state = torch.tanh(self.gru_init(hidden).permute(1, 0, 2))
+ h1_state = gru_state[:,:,:96].contiguous()
+ h2_state = gru_state[:,:,96:192].contiguous()
+ h3_state = gru_state[:,:,192:288].contiguous()
+ h4_state = gru_state[:,:,288:384].contiguous()
+ h5_state = gru_state[:,:,384:].contiguous()
+
+ # run decoding layer stack
+ x = n(torch.tanh(self.dense_1(z)))
+
+ x = torch.cat([x, n(self.glu1(n(self.gru1(x, h1_state)[0])))], -1)
+ x = torch.cat([x, n(self.conv1(x))], -1)
+ x = torch.cat([x, n(self.glu2(n(self.gru2(x, h2_state)[0])))], -1)
+ x = torch.cat([x, n(self.conv2(x))], -1)
+ x = torch.cat([x, n(self.glu3(n(self.gru3(x, h3_state)[0])))], -1)
+ x = torch.cat([x, n(self.conv3(x))], -1)
+ x = torch.cat([x, n(self.glu4(n(self.gru4(x, h4_state)[0])))], -1)
+ x = torch.cat([x, n(self.conv4(x))], -1)
+ x = torch.cat([x, n(self.glu5(n(self.gru5(x, h5_state)[0])))], -1)
+ x = torch.cat([x, n(self.conv5(x))], -1)
+
+ # output layer and reshaping
+ x10 = self.output(x)
+ features = torch.reshape(x10, (x10.size(0), x10.size(1) * self.FRAMES_PER_STEP, x10.size(2) // self.FRAMES_PER_STEP))
+
+ return features
+
+
+class StatisticalModel(nn.Module):
+ def __init__(self, quant_levels, latent_dim, state_dim):
+ """ Statistical model for latent space
+
+ Computes scaling, deadzone, r, and theta
+
+ """
+
+ super(StatisticalModel, self).__init__()
+
+ # copy parameters
+ self.latent_dim = latent_dim
+ self.state_dim = state_dim
+ self.total_dim = latent_dim + state_dim
+ self.quant_levels = quant_levels
+ self.embedding_dim = 6 * self.total_dim
+
+ # quantization embedding
+ self.quant_embedding = nn.Embedding(quant_levels, self.embedding_dim)
+
+ # initialize embedding to 0
+ with torch.no_grad():
+ self.quant_embedding.weight[:] = 0
+
+
+ def forward(self, quant_ids):
+ """ takes quant_ids and returns statistical model parameters"""
+
+ x = self.quant_embedding(quant_ids)
+
+ # CAVE: theta_soft is not used anymore. Kick it out?
+ quant_scale = F.softplus(x[..., 0 * self.total_dim : 1 * self.total_dim])
+ dead_zone = F.softplus(x[..., 1 * self.total_dim : 2 * self.total_dim])
+ theta_soft = torch.sigmoid(x[..., 2 * self.total_dim : 3 * self.total_dim])
+ r_soft = torch.sigmoid(x[..., 3 * self.total_dim : 4 * self.total_dim])
+ theta_hard = torch.sigmoid(x[..., 4 * self.total_dim : 5 * self.total_dim])
+ r_hard = torch.sigmoid(x[..., 5 * self.total_dim : 6 * self.total_dim])
+
+
+ return {
+ 'quant_embedding' : x,
+ 'quant_scale' : quant_scale,
+ 'dead_zone' : dead_zone,
+ 'r_hard' : r_hard,
+ 'theta_hard' : theta_hard,
+ 'r_soft' : r_soft,
+ 'theta_soft' : theta_soft
+ }
+
+
+class RDOVAE(nn.Module):
+ def __init__(self,
+ feature_dim,
+ latent_dim,
+ quant_levels,
+ cond_size,
+ cond_size2,
+ state_dim=24,
+ split_mode='split',
+ clip_weights=False,
+ pvq_num_pulses=82,
+ state_dropout_rate=0):
+
+ super(RDOVAE, self).__init__()
+
+ self.feature_dim = feature_dim
+ self.latent_dim = latent_dim
+ self.quant_levels = quant_levels
+ self.cond_size = cond_size
+ self.cond_size2 = cond_size2
+ self.split_mode = split_mode
+ self.state_dim = state_dim
+ self.pvq_num_pulses = pvq_num_pulses
+ self.state_dropout_rate = state_dropout_rate
+
+ # submodules encoder and decoder share the statistical model
+ self.statistical_model = StatisticalModel(quant_levels, latent_dim, state_dim)
+ self.core_encoder = nn.DataParallel(CoreEncoder(feature_dim, latent_dim, cond_size, cond_size2, state_size=state_dim))
+ self.core_decoder = nn.DataParallel(CoreDecoder(latent_dim, feature_dim, cond_size, cond_size2, state_size=state_dim))
+
+ self.enc_stride = CoreEncoder.FRAMES_PER_STEP
+ self.dec_stride = CoreDecoder.FRAMES_PER_STEP
+
+ if clip_weights:
+ self.weight_clip_fn = weight_clip_factory(0.496)
+ else:
+ self.weight_clip_fn = None
+
+ if self.dec_stride % self.enc_stride != 0:
+ raise ValueError(f"get_decoder_chunks_generic: encoder stride does not divide decoder stride")
+
+ def clip_weights(self):
+ if not type(self.weight_clip_fn) == type(None):
+ self.apply(self.weight_clip_fn)
+
+ def sparsify(self):
+ #self.core_encoder.module.sparsify()
+ self.core_decoder.module.sparsify()
+
+ def get_decoder_chunks(self, z_frames, mode='split', chunks_per_offset = 4):
+
+ enc_stride = self.enc_stride
+ dec_stride = self.dec_stride
+
+ stride = dec_stride // enc_stride
+
+ chunks = []
+
+ for offset in range(stride):
+ # start is the smalles number = offset mod stride that decodes to a valid range
+ start = offset
+ while enc_stride * (start + 1) - dec_stride < 0:
+ start += stride
+
+ # check if start is a valid index
+ if start >= z_frames:
+ raise ValueError("get_decoder_chunks_generic: range too small")
+
+ # stop is the smallest number outside [0, num_enc_frames] that's congruent to offset mod stride
+ stop = z_frames - (z_frames % stride) + offset
+ while stop < z_frames:
+ stop += stride
+
+ # calculate split points
+ length = (stop - start)
+ if mode == 'split':
+ split_points = [start + stride * int(i * length / chunks_per_offset / stride) for i in range(chunks_per_offset)] + [stop]
+ elif mode == 'random_split':
+ split_points = [stride * x + start for x in random_split(0, (stop - start)//stride - 1, chunks_per_offset - 1, 1)]
+ else:
+ raise ValueError(f"get_decoder_chunks_generic: unknown mode {mode}")
+
+
+ for i in range(chunks_per_offset):
+ # (enc_frame_start, enc_frame_stop, enc_frame_stride, stride, feature_frame_start, feature_frame_stop)
+ # encoder range(i, j, stride) maps to feature range(enc_stride * (i + 1) - dec_stride, enc_stride * j)
+ # provided that i - j = 1 mod stride
+ chunks.append({
+ 'z_start' : split_points[i],
+ 'z_stop' : split_points[i + 1] - stride + 1,
+ 'z_stride' : stride,
+ 'features_start' : enc_stride * (split_points[i] + 1) - dec_stride,
+ 'features_stop' : enc_stride * (split_points[i + 1] - stride + 1)
+ })
+
+ return chunks
+
+
+ def forward(self, features, q_id):
+
+ # calculate statistical model from quantization ID
+ statistical_model = self.statistical_model(q_id)
+
+ # run encoder
+ z, states = self.core_encoder(features)
+
+ # scaling, dead-zone and quantization
+ z = z * statistical_model['quant_scale'][:,:,:self.latent_dim]
+ z = soft_dead_zone(z, statistical_model['dead_zone'][:,:,:self.latent_dim])
+
+ # quantization
+ z_q = hard_quantize(z) / statistical_model['quant_scale'][:,:,:self.latent_dim]
+ z_n = noise_quantize(z) / statistical_model['quant_scale'][:,:,:self.latent_dim]
+ #states_q = soft_pvq(states, self.pvq_num_pulses)
+ states = states * statistical_model['quant_scale'][:,:,self.latent_dim:]
+ states = soft_dead_zone(states, statistical_model['dead_zone'][:,:,self.latent_dim:])
+
+ states_q = hard_quantize(states) / statistical_model['quant_scale'][:,:,self.latent_dim:]
+ states_n = noise_quantize(states) / statistical_model['quant_scale'][:,:,self.latent_dim:]
+
+ if self.state_dropout_rate > 0:
+ drop = torch.rand(states_q.size(0)) < self.state_dropout_rate
+ mask = torch.ones_like(states_q)
+ mask[drop] = 0
+ states_q = states_q * mask
+
+ # decoder
+ chunks = self.get_decoder_chunks(z.size(1), mode=self.split_mode)
+
+ outputs_hq = []
+ outputs_sq = []
+ for chunk in chunks:
+ # decoder with hard quantized input
+ z_dec_reverse = torch.flip(z_q[..., chunk['z_start'] : chunk['z_stop'] : chunk['z_stride'], :], [1])
+ dec_initial_state = states_q[..., chunk['z_stop'] - 1 : chunk['z_stop'], :]
+ features_reverse = self.core_decoder(z_dec_reverse, dec_initial_state)
+ outputs_hq.append((torch.flip(features_reverse, [1]), chunk['features_start'], chunk['features_stop']))
+
+
+ # decoder with soft quantized input
+ z_dec_reverse = torch.flip(z_n[..., chunk['z_start'] : chunk['z_stop'] : chunk['z_stride'], :], [1])
+ dec_initial_state = states_n[..., chunk['z_stop'] - 1 : chunk['z_stop'], :]
+ features_reverse = self.core_decoder(z_dec_reverse, dec_initial_state)
+ outputs_sq.append((torch.flip(features_reverse, [1]), chunk['features_start'], chunk['features_stop']))
+
+ return {
+ 'outputs_hard_quant' : outputs_hq,
+ 'outputs_soft_quant' : outputs_sq,
+ 'z' : z,
+ 'states' : states,
+ 'statistical_model' : statistical_model
+ }
+
+ def encode(self, features):
+ """ encoder with quantization and rate estimation """
+
+ z, states = self.core_encoder(features)
+
+ # quantization of initial states
+ states = soft_pvq(states, self.pvq_num_pulses)
+ state_size = m.log2(pvq_codebook_size(self.state_dim, self.pvq_num_pulses))
+
+ return z, states, state_size
+
+ def decode(self, z, initial_state):
+ """ decoder (flips sequences by itself) """
+
+ z_reverse = torch.flip(z, [1])
+ features_reverse = self.core_decoder(z_reverse, initial_state)
+ features = torch.flip(features_reverse, [1])
+
+ return features
+
+ def quantize(self, z, q_ids):
+ """ quantization of latent vectors """
+
+ stats = self.statistical_model(q_ids)
+
+ zq = z * stats['quant_scale'][:self.latent_dim]
+ zq = soft_dead_zone(zq, stats['dead_zone'][:self.latent_dim])
+ zq = torch.round(zq)
+
+ sizes = hard_rate_estimate(zq, stats['r_hard'][:,:,:self.latent_dim], stats['theta_hard'][:,:,:self.latent_dim], reduce=False)
+
+ return zq, sizes
+
+ def unquantize(self, zq, q_ids):
+ """ re-scaling of latent vector """
+
+ stats = self.statistical_model(q_ids)
+
+ z = zq / stats['quant_scale'][:,:,:self.latent_dim]
+
+ return z
+
+ def freeze_model(self):
+
+ # freeze all parameters
+ for p in self.parameters():
+ p.requires_grad = False
+
+ for p in self.statistical_model.parameters():
+ p.requires_grad = True
diff --git a/dnn/torch/rdovae/requirements.txt b/dnn/torch/rdovae/requirements.txt
new file mode 100644
index 00000000..9225ea84
--- /dev/null
+++ b/dnn/torch/rdovae/requirements.txt
@@ -0,0 +1,4 @@
+numpy
+scipy
+torch
+tqdm \ No newline at end of file
diff --git a/dnn/torch/rdovae/train_rdovae.py b/dnn/torch/rdovae/train_rdovae.py
new file mode 100644
index 00000000..d9a43b33
--- /dev/null
+++ b/dnn/torch/rdovae/train_rdovae.py
@@ -0,0 +1,278 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+import torch
+import tqdm
+
+from rdovae import RDOVAE, RDOVAEDataset, distortion_loss, hard_rate_estimate, soft_rate_estimate
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('features', type=str, help='path to feature file in .f32 format')
+parser.add_argument('output', type=str, help='path to output folder')
+
+parser.add_argument('--cuda-visible-devices', type=str, help="comma separates list of cuda visible device indices, default: ''", default="")
+
+
+model_group = parser.add_argument_group(title="model parameters")
+model_group.add_argument('--latent-dim', type=int, help="number of symbols produces by encoder, default: 80", default=80)
+model_group.add_argument('--cond-size', type=int, help="first conditioning size, default: 256", default=256)
+model_group.add_argument('--cond-size2', type=int, help="second conditioning size, default: 256", default=256)
+model_group.add_argument('--state-dim', type=int, help="dimensionality of transfered state, default: 24", default=24)
+model_group.add_argument('--quant-levels', type=int, help="number of quantization levels, default: 16", default=16)
+model_group.add_argument('--lambda-min', type=float, help="minimal value for rate lambda, default: 0.0002", default=2e-4)
+model_group.add_argument('--lambda-max', type=float, help="maximal value for rate lambda, default: 0.0104", default=0.0104)
+model_group.add_argument('--pvq-num-pulses', type=int, help="number of pulses for PVQ, default: 82", default=82)
+model_group.add_argument('--state-dropout-rate', type=float, help="state dropout rate, default: 0", default=0.0)
+
+training_group = parser.add_argument_group(title="training parameters")
+training_group.add_argument('--batch-size', type=int, help="batch size, default: 32", default=32)
+training_group.add_argument('--lr', type=float, help='learning rate, default: 3e-4', default=3e-4)
+training_group.add_argument('--epochs', type=int, help='number of training epochs, default: 100', default=100)
+training_group.add_argument('--sequence-length', type=int, help='sequence length, needs to be divisible by 4, default: 256', default=256)
+training_group.add_argument('--lr-decay-factor', type=float, help='learning rate decay factor, default: 2.5e-5', default=2.5e-5)
+training_group.add_argument('--split-mode', type=str, choices=['split', 'random_split'], help='splitting mode for decoder input, default: split', default='split')
+training_group.add_argument('--enable-first-frame-loss', action='store_true', default=False, help='enables dedicated distortion loss on first 4 decoder frames')
+training_group.add_argument('--initial-checkpoint', type=str, help='initial checkpoint to start training from, default: None', default=None)
+training_group.add_argument('--train-decoder-only', action='store_true', help='freeze encoder and statistical model and train decoder only')
+
+args = parser.parse_args()
+
+# set visible devices
+os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_visible_devices
+
+# checkpoints
+checkpoint_dir = os.path.join(args.output, 'checkpoints')
+checkpoint = dict()
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+# training parameters
+batch_size = args.batch_size
+lr = args.lr
+epochs = args.epochs
+sequence_length = args.sequence_length
+lr_decay_factor = args.lr_decay_factor
+split_mode = args.split_mode
+# not exposed
+adam_betas = [0.8, 0.95]
+adam_eps = 1e-8
+
+checkpoint['batch_size'] = batch_size
+checkpoint['lr'] = lr
+checkpoint['lr_decay_factor'] = lr_decay_factor
+checkpoint['split_mode'] = split_mode
+checkpoint['epochs'] = epochs
+checkpoint['sequence_length'] = sequence_length
+checkpoint['adam_betas'] = adam_betas
+
+# logging
+log_interval = 10
+
+# device
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+# model parameters
+cond_size = args.cond_size
+cond_size2 = args.cond_size2
+latent_dim = args.latent_dim
+quant_levels = args.quant_levels
+lambda_min = args.lambda_min
+lambda_max = args.lambda_max
+state_dim = args.state_dim
+# not expsed
+num_features = 20
+
+
+# training data
+feature_file = args.features
+
+# model
+checkpoint['model_args'] = (num_features, latent_dim, quant_levels, cond_size, cond_size2)
+checkpoint['model_kwargs'] = {'state_dim': state_dim, 'split_mode' : split_mode, 'pvq_num_pulses': args.pvq_num_pulses, 'state_dropout_rate': args.state_dropout_rate}
+model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+
+if type(args.initial_checkpoint) != type(None):
+ checkpoint = torch.load(args.initial_checkpoint, map_location='cpu')
+ model.load_state_dict(checkpoint['state_dict'], strict=False)
+
+checkpoint['state_dict'] = model.state_dict()
+
+if args.train_decoder_only:
+ if args.initial_checkpoint is None:
+ print("warning: training decoder only without providing initial checkpoint")
+
+ for p in model.core_encoder.module.parameters():
+ p.requires_grad = False
+
+ for p in model.statistical_model.parameters():
+ p.requires_grad = False
+
+# dataloader
+checkpoint['dataset_args'] = (feature_file, sequence_length, num_features, 36)
+checkpoint['dataset_kwargs'] = {'lambda_min': lambda_min, 'lambda_max': lambda_max, 'enc_stride': model.enc_stride, 'quant_levels': quant_levels}
+dataset = RDOVAEDataset(*checkpoint['dataset_args'], **checkpoint['dataset_kwargs'])
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4)
+
+
+
+# optimizer
+params = [p for p in model.parameters() if p.requires_grad]
+optimizer = torch.optim.Adam(params, lr=lr, betas=adam_betas, eps=adam_eps)
+
+
+# learning rate scheduler
+scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay_factor * x))
+
+if __name__ == '__main__':
+
+ # push model to device
+ model.to(device)
+
+ # training loop
+
+ for epoch in range(1, epochs + 1):
+
+ print(f"training epoch {epoch}...")
+
+ # running stats
+ running_rate_loss = 0
+ running_soft_dist_loss = 0
+ running_hard_dist_loss = 0
+ running_hard_rate_loss = 0
+ running_soft_rate_loss = 0
+ running_total_loss = 0
+ running_rate_metric = 0
+ running_states_rate_metric = 0
+ previous_total_loss = 0
+ running_first_frame_loss = 0
+
+ with tqdm.tqdm(dataloader, unit='batch') as tepoch:
+ for i, (features, rate_lambda, q_ids) in enumerate(tepoch):
+
+ # zero out gradients
+ optimizer.zero_grad()
+
+ # push inputs to device
+ features = features.to(device)
+ q_ids = q_ids.to(device)
+ rate_lambda = rate_lambda.to(device)
+
+
+ rate_lambda_upsamp = torch.repeat_interleave(rate_lambda, 2, 1)
+
+ # run model
+ model_output = model(features, q_ids)
+
+ # collect outputs
+ z = model_output['z']
+ states = model_output['states']
+ outputs_hard_quant = model_output['outputs_hard_quant']
+ outputs_soft_quant = model_output['outputs_soft_quant']
+ statistical_model = model_output['statistical_model']
+
+ # rate loss
+ hard_rate = hard_rate_estimate(z, statistical_model['r_hard'][:,:,:latent_dim], statistical_model['theta_hard'][:,:,:latent_dim], reduce=False)
+ soft_rate = soft_rate_estimate(z, statistical_model['r_soft'][:,:,:latent_dim], reduce=False)
+ states_hard_rate = hard_rate_estimate(states, statistical_model['r_hard'][:,:,latent_dim:], statistical_model['theta_hard'][:,:,latent_dim:], reduce=False)
+ states_soft_rate = soft_rate_estimate(states, statistical_model['r_soft'][:,:,latent_dim:], reduce=False)
+ soft_rate_loss = torch.mean(torch.sqrt(rate_lambda) * (soft_rate + .02*states_soft_rate))
+ hard_rate_loss = torch.mean(torch.sqrt(rate_lambda) * (hard_rate + .02*states_hard_rate))
+ rate_loss = (soft_rate_loss + 0.1 * hard_rate_loss)
+ hard_rate_metric = torch.mean(hard_rate)
+ states_rate_metric = torch.mean(states_hard_rate)
+
+ ## distortion losses
+
+ # hard quantized decoder input
+ distortion_loss_hard_quant = torch.zeros_like(rate_loss)
+ for dec_features, start, stop in outputs_hard_quant:
+ distortion_loss_hard_quant += distortion_loss(features[..., start : stop, :], dec_features, rate_lambda_upsamp[..., start : stop]) / len(outputs_hard_quant)
+
+ first_frame_loss = torch.zeros_like(rate_loss)
+ for dec_features, start, stop in outputs_hard_quant:
+ first_frame_loss += distortion_loss(features[..., stop-4 : stop, :], dec_features[..., -4:, :], rate_lambda_upsamp[..., stop - 4 : stop]) / len(outputs_hard_quant)
+
+ # soft quantized decoder input
+ distortion_loss_soft_quant = torch.zeros_like(rate_loss)
+ for dec_features, start, stop in outputs_soft_quant:
+ distortion_loss_soft_quant += distortion_loss(features[..., start : stop, :], dec_features, rate_lambda_upsamp[..., start : stop]) / len(outputs_soft_quant)
+
+ # total loss
+ total_loss = rate_loss + (distortion_loss_hard_quant + distortion_loss_soft_quant) / 2
+
+ if args.enable_first_frame_loss:
+ total_loss = .97*total_loss + 0.03 * first_frame_loss
+
+
+ total_loss.backward()
+
+ optimizer.step()
+
+ model.clip_weights()
+ model.sparsify()
+
+ scheduler.step()
+
+ # collect running stats
+ running_hard_dist_loss += float(distortion_loss_hard_quant.detach().cpu())
+ running_soft_dist_loss += float(distortion_loss_soft_quant.detach().cpu())
+ running_rate_loss += float(rate_loss.detach().cpu())
+ running_rate_metric += float(hard_rate_metric.detach().cpu())
+ running_states_rate_metric += float(states_rate_metric.detach().cpu())
+ running_total_loss += float(total_loss.detach().cpu())
+ running_first_frame_loss += float(first_frame_loss.detach().cpu())
+ running_soft_rate_loss += float(soft_rate_loss.detach().cpu())
+ running_hard_rate_loss += float(hard_rate_loss.detach().cpu())
+
+ if (i + 1) % log_interval == 0:
+ current_loss = (running_total_loss - previous_total_loss) / log_interval
+ tepoch.set_postfix(
+ current_loss=current_loss,
+ total_loss=running_total_loss / (i + 1),
+ dist_hq=running_hard_dist_loss / (i + 1),
+ dist_sq=running_soft_dist_loss / (i + 1),
+ rate_loss=running_rate_loss / (i + 1),
+ rate=running_rate_metric / (i + 1),
+ states_rate=running_states_rate_metric / (i + 1),
+ ffloss=running_first_frame_loss / (i + 1),
+ rateloss_hard=running_hard_rate_loss / (i + 1),
+ rateloss_soft=running_soft_rate_loss / (i + 1)
+ )
+ previous_total_loss = running_total_loss
+
+ # save checkpoint
+ checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch}.pth')
+ checkpoint['state_dict'] = model.state_dict()
+ checkpoint['loss'] = running_total_loss / len(dataloader)
+ checkpoint['epoch'] = epoch
+ torch.save(checkpoint, checkpoint_path)
diff --git a/dnn/torch/testsuite/README.md b/dnn/torch/testsuite/README.md
new file mode 100644
index 00000000..cc76965e
--- /dev/null
+++ b/dnn/torch/testsuite/README.md
@@ -0,0 +1,46 @@
+# lpcnet-testsuite
+
+## setup
+The test script is written for Linux only. It requires sox to be installed and available.
+
+Setup is done as usual via
+
+```
+pip install -r requirements.txt
+```
+
+The test scrip run_warpq_test.py requires a setup file in yaml format, which specifies how
+to generate a wave file OUTPUT from a wave file INPUT sampled resampled to the specified
+sampling rate as a list of shell commands. This makes it easy to test other neural vocoders
+with it as well. Two examples are given in examples. INPUT and OUTPUT will be replaced by using
+the string.format(INPUT=input,OUTPUT=output) method.
+
+Here is one example:
+
+```
+test: "LPCNet reference test"
+processing:
+ - "sox {INPUT} {INPUT}.raw"
+ - "/local/code/LPCNet/lpcnet_demo -features {INPUT}.raw {INPUT}.features.f32"
+ - "/local/code/LPCNet/lpcnet_demo -synthesis {INPUT}.features.f32 {INPUT}.decoded.raw"
+ - "sox -r 16000 -L -e signed-integer -b 16 -c 1 {INPUT}.decoded.raw {OUTPUT}"
+```
+
+The structure of the output folder is as follows:
+
+```
+output_folder
++-- html
+ +-- index.html
+ +-- items
++-- processing
++-- setup.yml
++-- stats.txt
++-- scores.txt
+```
+
+scores.txt contains the WARP-Q scores in descending order (best to worse)
+stats.txt contains mean values over all, the 10 best and the 10 worst items
+setup.yml contains all information to repeat the run
+htms contains a self-contained website displaying the 10 best and 10 worst items
+processing contains processing output \ No newline at end of file
diff --git a/dnn/torch/testsuite/examples/lpcnet_c_example.yml b/dnn/torch/testsuite/examples/lpcnet_c_example.yml
new file mode 100644
index 00000000..2858309c
--- /dev/null
+++ b/dnn/torch/testsuite/examples/lpcnet_c_example.yml
@@ -0,0 +1,6 @@
+test: "LPCNet reference test"
+processing:
+ - "sox {INPUT} {INPUT}.raw"
+ - "/local/code/LPCNet/lpcnet_demo -features {INPUT}.raw {INPUT}.features.f32"
+ - "/local/code/LPCNet/lpcnet_demo -synthesis {INPUT}.features.f32 {INPUT}.decoded.raw"
+ - "sox -r 16000 -L -e signed-integer -b 16 -c 1 {INPUT}.decoded.raw {OUTPUT} trim 0.015" \ No newline at end of file
diff --git a/dnn/torch/testsuite/examples/lpcnet_c_plc_example.yml b/dnn/torch/testsuite/examples/lpcnet_c_plc_example.yml
new file mode 100644
index 00000000..b97b26d1
--- /dev/null
+++ b/dnn/torch/testsuite/examples/lpcnet_c_plc_example.yml
@@ -0,0 +1,5 @@
+test: "LPCNet reference test"
+processing:
+ - "sox {INPUT} {INPUT}.raw"
+ - "/local/code/LPCNet/lpcnet_demo -plc_file causal {PLCFILE} {INPUT}.raw {INPUT}.decoded.raw"
+ - "sox -r 16000 -L -e signed-integer -b 16 -c 1 {INPUT}.decoded.raw {OUTPUT}" \ No newline at end of file
diff --git a/dnn/torch/testsuite/examples/lpcnet_torch_example.yml b/dnn/torch/testsuite/examples/lpcnet_torch_example.yml
new file mode 100644
index 00000000..631cbfad
--- /dev/null
+++ b/dnn/torch/testsuite/examples/lpcnet_torch_example.yml
@@ -0,0 +1,5 @@
+test: "no noise test"
+processing:
+ - "sox {INPUT} {INPUT}.raw"
+ - "/home/ubuntu/bin/lpcnet_dump_data_v2 -test {INPUT}.raw {INPUT}.features.f32"
+ - "/home/ubuntu/opt/miniconda3/envs/torch/bin/python /local/code/lpcnext/test_lpcnet.py {INPUT}.features.f32 /local/experiments/noise_augmentation/output/lpcnet_384_2/checkpoints/checkpoint_epoch_20.pth {OUTPUT}" \ No newline at end of file
diff --git a/dnn/torch/testsuite/requirements.txt b/dnn/torch/testsuite/requirements.txt
new file mode 100644
index 00000000..af2d9916
--- /dev/null
+++ b/dnn/torch/testsuite/requirements.txt
@@ -0,0 +1,12 @@
+scipy
+librosa
+numpy
+scikit-image
+pyvad
+speechpy
+soundfile
+pyyaml
+pesq
+AMFM_decompy
+matplotlib
+multiprocess \ No newline at end of file
diff --git a/dnn/torch/testsuite/run_test.py b/dnn/torch/testsuite/run_test.py
new file mode 100644
index 00000000..69463ddb
--- /dev/null
+++ b/dnn/torch/testsuite/run_test.py
@@ -0,0 +1,375 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import multiprocess as multiprocessing
+import random
+import subprocess
+import argparse
+import shutil
+
+import yaml
+
+from utils.files import get_wave_file_list
+from utils.pesq import compute_PESQ
+from utils.pitch import compute_pitch_error
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('setup', type=str, help='setup yaml specifying end to end processing with model under test')
+parser.add_argument('input_folder', type=str, help='input folder path')
+parser.add_argument('output_folder', type=str, help='output folder path')
+parser.add_argument('--num-testitems', type=int, help="number of testitems to be processed (default 100)", default=100)
+parser.add_argument('--seed', type=int, help='seed for random item selection', default=None)
+parser.add_argument('--fs', type=int, help="sampling rate at which input is presented as wave file (defaults to 16000)", default=16000)
+parser.add_argument('--num-workers', type=int, help="number of subprocesses to be used (default=4)", default=4)
+parser.add_argument('--plc-suffix', type=str, default="_is_lost.txt", help="suffix of plc error pattern file: only relevant if command chain uses PLCFILE (default=_is_lost.txt)")
+parser.add_argument('--metrics', type=str, default='pesq', help='comma separated string of metrics, supported: {{"pesq", "pitch_error", "voicing_error"}}, default="pesq"')
+parser.add_argument('--verbose', action='store_true', help='enables printouts of all commands run in the pipeline')
+
+def check_for_sox_in_path():
+ r = subprocess.run("sox -h", shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+ return r.returncode == 0
+
+
+def run_save_sh(command, verbose=False):
+
+ if verbose:
+ print(f"[run_save_sh] running command {command}...")
+
+ r = subprocess.run(command, shell=True)
+ if r.returncode != 0:
+ raise RuntimeError(f"command '{command}' failed with exit code {r.returncode}")
+
+
+def run_processing_chain(input_path, output_path, model_commands, fs, metrics={'pesq'}, plc_suffix="_is_lost.txt", verbose=False):
+
+ # prepare model input
+ model_input = output_path + ".resamp.wav"
+ run_save_sh(f"sox {input_path} -r {fs} {model_input}", verbose=verbose)
+
+ plcfile = os.path.splitext(input_path)[0] + plc_suffix
+ if os.path.isfile(plcfile):
+ run_save_sh(f"cp {plcfile} {os.path.dirname(output_path)}")
+
+ # generate model output
+ for command in model_commands:
+ run_save_sh(command.format(INPUT=model_input, OUTPUT=output_path, PLCFILE=plcfile), verbose=verbose)
+
+ scores = dict()
+ cache = dict()
+ for metric in metrics:
+ if metric == 'pesq':
+ # run pesq
+ score = compute_PESQ(input_path, output_path, fs=fs)
+ elif metric == 'pitch_error':
+ if metric in cache:
+ score = cache[metric]
+ else:
+ rval = compute_pitch_error(input_path, output_path, fs=fs)
+ score = rval[metric]
+ cache['voicing_error'] = rval['voicing_error']
+ elif metric == 'voicing_error':
+ if metric in cache:
+ score = cache[metric]
+ else:
+ rval = compute_pitch_error(input_path, output_path, fs=fs)
+ score = rval[metric]
+ cache['pitch_error'] = rval['pitch_error']
+ else:
+ ValueError(f'error: unknown metric {metric}')
+
+ scores[metric] = score
+
+ return (output_path, scores)
+
+
+def get_output_path(root_folder, input, output_folder):
+
+ input_relpath = os.path.relpath(input, root_folder)
+
+ os.makedirs(os.path.join(output_folder, 'processing', os.path.dirname(input_relpath)), exist_ok=True)
+
+ output_path = os.path.join(output_folder, 'processing', input_relpath + '.output.wav')
+
+ return output_path
+
+
+def add_audio_table(f, html_folder, results, title, metric):
+
+ item_folder = os.path.join(html_folder, 'items')
+ os.makedirs(item_folder, exist_ok=True)
+
+ # table with results
+ f.write(f"""
+ <div>
+ <h2> {title} </h2>
+ <table>
+ <tr>
+ <th> Rank </th>
+ <th> Name </th>
+ <th> {metric.upper()} </th>
+ <th> Audio (out) </th>
+ <th> Audio (orig) </th>
+ </tr>
+ """)
+
+ for i, r in enumerate(results):
+ item, score = r
+ item_name = os.path.basename(item)
+ new_item_path = os.path.join(item_folder, item_name)
+ shutil.copyfile(item, new_item_path)
+ shutil.copyfile(item + '.resamp.wav', os.path.join(item_folder, item_name + '.orig.wav'))
+
+ f.write(f"""
+ <tr>
+ <td> {i + 1} </td>
+ <td> {item_name.split('.')[0]} </td>
+ <td> {score:.3f} </td>
+ <td>
+ <audio controls>
+ <source src="items/{item_name}">
+ </audio>
+ </td>
+ <td>
+ <audio controls>
+ <source src="items/{item_name + '.orig.wav'}">
+ </audio>
+ </td>
+ </tr>
+ """)
+
+ # footer
+ f.write("""
+ </table>
+ </div>
+ """)
+
+
+def create_html(output_folder, results, title, metric):
+
+ html_folder = output_folder
+ items_folder = os.path.join(html_folder, 'items')
+ os.makedirs(html_folder, exist_ok=True)
+ os.makedirs(items_folder, exist_ok=True)
+
+ with open(os.path.join(html_folder, 'index.html'), 'w') as f:
+ # header and title
+ f.write(f"""
+ <!DOCTYPE html>
+ <html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>{title}</title>
+ <style>
+ article {{
+ align-items: flex-start;
+ display: flex;
+ flex-wrap: wrap;
+ gap: 4em;
+ }}
+ html {{
+ box-sizing: border-box;
+ font-family: "Amazon Ember", "Source Sans", "Verdana", "Calibri", sans-serif;
+ padding: 2em;
+ }}
+ td {{
+ padding: 3px 7px;
+ text-align: center;
+ }}
+ td:first-child {{
+ text-align: end;
+ }}
+ th {{
+ background: #ff9900;
+ color: #000;
+ font-size: 1.2em;
+ padding: 7px 7px;
+ }}
+ </style>
+ </head>
+ </body>
+ <h1>{title}</h1>
+ <article>
+ """)
+
+ # top 20
+ add_audio_table(f, html_folder, results[:-21: -1], "Top 20", metric)
+
+ # 20 around median
+ N = len(results) // 2
+ add_audio_table(f, html_folder, results[N + 10 : N - 10: -1], "Median 20", metric)
+
+ # flop 20
+ add_audio_table(f, html_folder, results[:20], "Flop 20", metric)
+
+ # footer
+ f.write("""
+ </article>
+ </body>
+ </html>
+ """)
+
+metric_sorting_signs = {
+ 'pesq' : 1,
+ 'pitch_error' : -1,
+ 'voicing_error' : -1
+}
+
+def is_valid_result(data, metrics):
+ if not isinstance(data, dict):
+ return False
+
+ for metric in metrics:
+ if not metric in data:
+ return False
+
+ return True
+
+
+def evaluate_results(output_folder, results, metric):
+
+ results = sorted(results, key=lambda x : metric_sorting_signs[metric] * x[1])
+ with open(os.path.join(args.output_folder, f'scores_{metric}.txt'), 'w') as f:
+ for result in results:
+ f.write(f"{os.path.relpath(result[0], args.output_folder)} {result[1]}\n")
+
+
+ # some statistics
+ mean = sum([r[1] for r in results]) / len(results)
+ top_mean = sum([r[1] for r in results[-20:]]) / 20
+ bottom_mean = sum([r[1] for r in results[:20]]) / 20
+
+ with open(os.path.join(args.output_folder, f'stats_{metric}.txt'), 'w') as f:
+ f.write(f"mean score: {mean}\n")
+ f.write(f"bottom mean score: {bottom_mean}\n")
+ f.write(f"top mean score: {top_mean}\n")
+
+ print(f"\nmean score: {mean}")
+ print(f"bottom mean score: {bottom_mean}")
+ print(f"top mean score: {top_mean}\n")
+
+ # create output html
+ create_html(os.path.join(output_folder, 'html', metric), results, setup['test'], metric)
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ # check for sox
+ if not check_for_sox_in_path():
+ raise RuntimeError("script requires sox")
+
+
+ # prepare output folder
+ if os.path.exists(args.output_folder):
+ print("warning: output folder exists")
+
+ reply = input('continue? (y/n): ')
+ while reply not in {'y', 'n'}:
+ reply = input('continue? (y/n): ')
+
+ if reply == 'n':
+ os._exit()
+ else:
+ # start with a clean sleight
+ shutil.rmtree(args.output_folder)
+
+ os.makedirs(args.output_folder, exist_ok=True)
+
+ # extract metrics
+ metrics = args.metrics.split(",")
+ for metric in metrics:
+ if not metric in metric_sorting_signs:
+ print(f"unknown metric {metric}")
+ args.usage()
+
+ # read setup
+ print(f"loading {args.setup}...")
+ with open(args.setup, "r") as f:
+ setup = yaml.load(f.read(), yaml.FullLoader)
+
+ model_commands = setup['processing']
+
+ print("\nfound the following model commands:")
+ for command in model_commands:
+ print(command.format(INPUT='input.wav', OUTPUT='output.wav', PLCFILE='input_is_lost.txt'))
+
+ # store setup to output folder
+ setup['input'] = os.path.abspath(args.input_folder)
+ setup['output'] = os.path.abspath(args.output_folder)
+ setup['seed'] = args.seed
+ with open(os.path.join(args.output_folder, 'setup.yml'), 'w') as f:
+ yaml.dump(setup, f)
+
+ # get input
+ print(f"\nCollecting audio files from {args.input_folder}...")
+ file_list = get_wave_file_list(args.input_folder, check_for_features=False)
+ print(f"...{len(file_list)} files found\n")
+
+ # sample from file list
+ file_list = sorted(file_list)
+ random.seed(args.seed)
+ random.shuffle(file_list)
+ num_testitems = min(args.num_testitems, len(file_list))
+ file_list = file_list[:num_testitems]
+
+
+ print(f"\nlaunching test on {num_testitems} items...")
+ # helper function for parallel processing
+ def func(input_path):
+ output_path = get_output_path(args.input_folder, input_path, args.output_folder)
+
+ try:
+ rval = run_processing_chain(input_path, output_path, model_commands, args.fs, metrics=metrics, plc_suffix=args.plc_suffix, verbose=args.verbose)
+ except:
+ rval = (input_path, -1)
+
+ return rval
+
+ with multiprocessing.Pool(args.num_workers) as p:
+ results = p.map(func, file_list)
+
+ results_dict = dict()
+ for name, values in results:
+ if is_valid_result(values, metrics):
+ results_dict[name] = values
+
+ print(results_dict)
+
+ # evaluating results
+ num_failures = num_testitems - len(results_dict)
+ print(f"\nprocessing of {num_failures} items failed\n")
+
+ for metric in metrics:
+ print(metric)
+ evaluate_results(
+ args.output_folder,
+ [(name, value[metric]) for name, value in results_dict.items()],
+ metric
+ ) \ No newline at end of file
diff --git a/dnn/torch/testsuite/utils/__init__.py b/dnn/torch/testsuite/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/dnn/torch/testsuite/utils/__init__.py
diff --git a/dnn/torch/testsuite/utils/files.py b/dnn/torch/testsuite/utils/files.py
new file mode 100644
index 00000000..c3a15536
--- /dev/null
+++ b/dnn/torch/testsuite/utils/files.py
@@ -0,0 +1,54 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+
+
+def get_wave_file_list(parent_folder, extensions=[".wav", ".flac"], check_for_features=False):
+ """ traverses subfolders of parent_folder in search for files that match the given extension """
+
+ file_list = []
+
+ for root, dirs, files in os.walk(parent_folder, topdown=True):
+
+ for file in files:
+
+ stem, ext = os.path.splitext(file)
+
+ #check for extension
+ if not ext in extensions:
+ continue
+
+ # check if feature file exists
+ if check_for_features and not os.path.isfile(os.path.join(root, stem + "_features.f32")):
+ continue
+
+ file_list.append(os.path.join(root, file))
+
+ return file_list \ No newline at end of file
diff --git a/dnn/torch/testsuite/utils/pesq.py b/dnn/torch/testsuite/utils/pesq.py
new file mode 100644
index 00000000..5a27f518
--- /dev/null
+++ b/dnn/torch/testsuite/utils/pesq.py
@@ -0,0 +1,43 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import pesq
+import librosa
+
+def compute_PESQ(ref, test, fs=16000):
+
+ if not ref.endswith('.wav') or not test.endswith('.wav'):
+ raise ValueError('error: expecting .wav as file extension')
+
+ ref_item, _ = librosa.load(ref, sr=fs)
+ test_item, _ = librosa.load(test, sr=fs)
+
+ score = pesq.pesq(fs, ref_item, test_item)
+
+ return score \ No newline at end of file
diff --git a/dnn/torch/testsuite/utils/pitch.py b/dnn/torch/testsuite/utils/pitch.py
new file mode 100644
index 00000000..f9429651
--- /dev/null
+++ b/dnn/torch/testsuite/utils/pitch.py
@@ -0,0 +1,61 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import numpy as np
+from scipy.io import wavfile
+import amfm_decompy.pYAAPT as pYAAPT
+import amfm_decompy.basic_tools as basic
+
+def get_voicing_info(x, sr=16000):
+
+ signal = basic.SignalObj(x, sr)
+ pitch = pYAAPT.yaapt(signal, **{'frame_length' : 20.0, 'tda_frame_length' : 20.0})
+
+ pitch_values = pitch.samp_values
+ voiced_flags = pitch.vuv.astype('float')
+
+ return pitch_values, voiced_flags
+
+def compute_pitch_error(ref_path, test_path, fs=16000):
+ fs_orig, x_orig = wavfile.read(ref_path)
+ fs_test, x_test = wavfile.read(test_path)
+
+ min_length = min(len(x_orig), len(x_test))
+ x_orig = x_orig[:min_length]
+ x_test = x_test[:min_length]
+
+ assert fs_orig == fs_test == fs
+
+ pitch_contour_orig, voicing_orig = get_voicing_info(x_orig.astype(np.float32))
+ pitch_contour_test, voicing_test = get_voicing_info(x_test.astype(np.float32))
+
+ return {
+ 'pitch_error' : np.mean(np.abs(pitch_contour_orig - pitch_contour_test)).item(),
+ 'voicing_error' : np.sum(np.abs(voicing_orig - voicing_test)).item() / len(voicing_orig)
+ } \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/README.md b/dnn/torch/weight-exchange/README.md
new file mode 100644
index 00000000..f4818b5b
--- /dev/null
+++ b/dnn/torch/weight-exchange/README.md
@@ -0,0 +1,21 @@
+# weight-exchange
+
+
+
+## Weight Exchange
+Repo wor exchanging weights betweeen torch an tensorflow.keras modules, using an intermediate numpy format.
+
+Routines for loading/dumping torch weights are located in exchange/torch and can be loaded with
+```
+import exchange.torch
+```
+and routines for loading/dumping tensorflow weights are located in exchange/tf and can be loaded with
+```
+import exchange.tf
+```
+
+Note that `exchange.torch` requires torch to be installed and `exchange.tf` requires tensorflow. To avoid the necessity of installing both torch and tensorflow in the working environment, none of these submodules is imported when calling `import exchange`. Similarly, the requirements listed in `requirements.txt` do include neither Tensorflow or Pytorch.
+
+
+## C export
+The module `exchange.c_export` contains routines to export weights to C files. On the long run it will be possible to call all `dump_...` functions with either a path string or a `CWriter` instance based on which the export format is chosen. This is currently only implemented for `torch.nn.GRU`, `torch.nn.Linear` and `torch.nn.Conv1d`. \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/requirements.txt b/dnn/torch/weight-exchange/requirements.txt
new file mode 100644
index 00000000..296d6545
--- /dev/null
+++ b/dnn/torch/weight-exchange/requirements.txt
@@ -0,0 +1 @@
+numpy \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/setup.py b/dnn/torch/weight-exchange/setup.py
new file mode 100644
index 00000000..2b20440b
--- /dev/null
+++ b/dnn/torch/weight-exchange/setup.py
@@ -0,0 +1,48 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+#!/usr/bin/env/python
+import os
+from setuptools import setup
+
+lib_folder = os.path.dirname(os.path.realpath(__file__))
+
+with open(os.path.join(lib_folder, 'requirements.txt'), 'r') as f:
+ install_requires = list(f.read().splitlines())
+
+print(install_requires)
+
+setup(name='wexchange',
+ version='1.6',
+ author='Jan Buethe',
+ author_email='jbuethe@amazon.de',
+ description='Weight-exchange library between Pytorch and Tensorflow',
+ packages=['wexchange', 'wexchange.tf', 'wexchange.torch', 'wexchange.c_export'],
+ install_requires=install_requires
+ )
diff --git a/dnn/torch/weight-exchange/wexchange/__init__.py b/dnn/torch/weight-exchange/wexchange/__init__.py
new file mode 100644
index 00000000..a0286142
--- /dev/null
+++ b/dnn/torch/weight-exchange/wexchange/__init__.py
@@ -0,0 +1,30 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+from . import c_export \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/wexchange/c_export/__init__.py b/dnn/torch/weight-exchange/wexchange/c_export/__init__.py
new file mode 100644
index 00000000..2a580c80
--- /dev/null
+++ b/dnn/torch/weight-exchange/wexchange/c_export/__init__.py
@@ -0,0 +1,31 @@
+from .c_writer import CWriter
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+from .common import print_gru_layer, print_dense_layer, print_conv1d_layer, print_tconv1d_layer, print_conv2d_layer, print_vector \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py
new file mode 100644
index 00000000..2745f337
--- /dev/null
+++ b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py
@@ -0,0 +1,182 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+from collections import OrderedDict
+
+class CWriter:
+ def __init__(self,
+ filename_without_extension,
+ message=None,
+ header_only=False,
+ create_state_struct=False,
+ enable_binary_blob=True,
+ model_struct_name="Model",
+ nnet_header="nnet.h",
+ add_typedef=False):
+ """
+ Writer class for creating souce and header files for weight exports to C
+
+ Parameters:
+ -----------
+
+ filename_without_extension: str
+ filename from which .c and .h files are created
+
+ message: str, optional
+ if given and not None, this message will be printed as comment in the header file
+
+ header_only: bool, optional
+ if True, only a header file is created; defaults to False
+
+ enable_binary_blob: bool, optional
+ if True, export is done in binary blob format and a model type is created; defaults to False
+
+ create_state_struct: bool, optional
+ if True, a state struct type is created in the header file; if False, state sizes are defined as macros; defaults to False
+
+ model_struct_name: str, optional
+ name used for the model struct type; only relevant when enable_binary_blob is True; defaults to "Model"
+
+ nnet_header: str, optional
+ name of header nnet header file; defaults to nnet.h
+
+ """
+
+
+ self.header_only = header_only
+ self.enable_binary_blob = enable_binary_blob
+ self.create_state_struct = create_state_struct
+ self.model_struct_name = model_struct_name
+ self.add_typedef = add_typedef
+
+ # for binary blob format, format is key=<layer name>, value=(<layer type>, <init call>)
+ self.layer_dict = OrderedDict()
+
+ # for binary blob format, format is key=<layer name>, value=<layer type>
+ self.weight_arrays = []
+
+ # form model struct, format is key=<layer name>, value=<number of elements>
+ self.state_dict = OrderedDict()
+
+ self.header = open(filename_without_extension + ".h", "w")
+ header_name = os.path.basename(filename_without_extension) + '.h'
+
+ if message is not None:
+ self.header.write(f"/* {message} */\n\n")
+
+ self.header_guard = os.path.basename(filename_without_extension).upper() + "_H"
+ self.header.write(
+f'''
+#ifndef {self.header_guard}
+#define {self.header_guard}
+
+#include "{nnet_header}"
+
+'''
+ )
+
+ if not self.header_only:
+ self.source = open(filename_without_extension + ".c", "w")
+ if message is not None:
+ self.source.write(f"/* {message} */\n\n")
+
+ self.source.write(
+f"""
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+""")
+ self.source.write(f'#include "{header_name}"\n\n')
+
+
+ def _finalize_header(self):
+
+ # create model type
+ if self.enable_binary_blob:
+ if self.add_typedef:
+ self.header.write(f"\ntypedef struct {{")
+ else:
+ self.header.write(f"\nstruct {self.model_struct_name} {{")
+ for name, data in self.layer_dict.items():
+ layer_type = data[0]
+ self.header.write(f"\n {layer_type} {name};")
+ if self.add_typedef:
+ self.header.write(f"\n}} {self.model_struct_name};\n")
+ else:
+ self.header.write(f"\n}};\n")
+
+ init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)"
+ self.header.write(f"\n{init_prototype};\n")
+
+ self.header.write(f"\n#endif /* {self.header_guard} */\n")
+
+ def _finalize_source(self):
+
+ if self.enable_binary_blob:
+ # create weight array
+ if len(set(self.weight_arrays)) != len(self.weight_arrays):
+ raise ValueError("error: detected duplicates in weight arrays")
+ self.source.write("\n#ifndef USE_WEIGHTS_FILE\n")
+ self.source.write(f"const WeightArray {self.model_struct_name.lower()}_arrays[] = {{\n")
+ for name in self.weight_arrays:
+ self.source.write(f"#ifdef WEIGHTS_{name}_DEFINED\n")
+ self.source.write(f' {{"{name}", WEIGHTS_{name}_TYPE, sizeof({name}), {name}}},\n')
+ self.source.write(f"#endif\n")
+ self.source.write(" {NULL, 0, 0, NULL}\n")
+ self.source.write("};\n")
+
+ self.source.write("#endif /* USE_WEIGHTS_FILE */\n")
+
+ # create init function definition
+ init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)"
+ self.source.write("\n#ifndef DUMP_BINARY_WEIGHTS\n")
+ self.source.write(f"{init_prototype} {{\n")
+ for name, data in self.layer_dict.items():
+ self.source.write(f" if ({data[1]}) return 1;\n")
+ self.source.write(" return 0;\n")
+ self.source.write("}\n")
+ self.source.write("#endif /* DUMP_BINARY_WEIGHTS */\n")
+
+
+ def close(self):
+
+ if not self.header_only:
+ self._finalize_source()
+ self.source.close()
+
+ self._finalize_header()
+ self.header.close()
+
+ def __del__(self):
+ try:
+ self.close()
+ except:
+ pass \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/wexchange/c_export/common.py b/dnn/torch/weight-exchange/wexchange/c_export/common.py
new file mode 100644
index 00000000..039edd9b
--- /dev/null
+++ b/dnn/torch/weight-exchange/wexchange/c_export/common.py
@@ -0,0 +1,387 @@
+'''Copyright (c) 2017-2018 Mozilla
+ Copyright (c) 2022 Amazon
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+import numpy as np
+
+from .c_writer import CWriter
+
+def print_vector(writer, vector, name, dtype='float', reshape_8x4=False, static=True, debug_float=False):
+
+ if isinstance(writer, CWriter):
+ f = writer.source
+ binary_blob = writer.enable_binary_blob
+ else:
+ f = writer
+ binary_blob = False
+
+ dtype_suffix = {
+ 'float' : 'float',
+ 'opus_int8' : 'int8',
+ 'opus_uint16' : 'uint16',
+ 'opus_int16' : 'int16',
+ 'int' : 'int',
+ 'qweight': 'qweight'
+ }
+
+
+ if binary_blob:
+ f.write(
+f'''
+#ifndef USE_WEIGHTS_FILE
+'''
+ )
+ writer.weight_arrays.append(name)
+
+ if reshape_8x4:
+ vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
+ vector = vector.transpose((2, 0, 3, 1))
+
+ v = np.reshape(vector, (-1))
+
+ if debug_float:
+ f.write('#ifndef DISABLE_DEBUG_FLOAT\n')
+ if binary_blob:
+ f.write(
+f'''
+#define WEIGHTS_{name}_DEFINED
+#define WEIGHTS_{name}_TYPE WEIGHT_TYPE_{dtype_suffix[dtype]}
+'''
+ )
+
+ if static:
+ f.write('static ')
+
+ f.write(f'const {dtype} {name}[{len(v)}] = {{\n ')
+
+ for i in range(0, len(v)):
+
+ f.write(f'{v[i]}')
+
+ if (i!=len(v)-1):
+ f.write(',')
+ else:
+ break
+
+ if (i%8==7):
+ f.write("\n ")
+ else:
+ f.write(" ")
+
+ f.write('\n};\n\n')
+ if debug_float: f.write('#endif /*DISABLE_DEBUG_FLOAT*/\n')
+
+ if binary_blob:
+ f.write(
+f'''
+#endif /* USE_WEIGHTS_FILE */
+'''
+ )
+
+ return vector
+
+
+
+def extract_diagonal(A):
+ """ input shape is (N, k*N) """
+
+ N, M = A.shape
+ B = A.copy()
+ assert M % N == 0
+ k = M // N
+
+ diags = []
+ for l in range(k):
+ diag = np.diag(B[:, l * N : (l+1) * N]).copy()
+ B[:, l * N : (l+1) * N] -= np.diag(diag)
+ diags.append(diag)
+
+ diag = np.concatenate(diags)
+
+ return diag, B
+
+def quantize_weight(weight, scale):
+ scale = scale + 1e-30
+ Aq = np.round(weight / scale).astype('int')
+ if Aq.max() > 127 or Aq.min() <= -128:
+ raise ValueError("value out of bounds in quantize_weight")
+ Aq = np.clip(np.round(weight / scale).astype('int'), -128, 127)
+ return Aq
+
+
+def print_sparse_weight(writer, A, name, scale=1/128, have_diag=True, quantize=False):
+ N = A.shape[0]
+ M = A.shape[1]
+ W = np.zeros((0,), dtype='int')
+ W0 = np.zeros((0,))
+
+ if have_diag:
+ diag, A = extract_diagonal(A)
+ print_vector(writer, diag, name + '_diag')
+
+ if quantize:
+ Aq = quantize_weight(A, scale)
+ else:
+ Aq = A
+
+ # extract blocks
+ idx = np.zeros((0,), dtype='int')
+ for i in range(M//8):
+ pos = idx.shape[0]
+ idx = np.append(idx, -1)
+ nb_nonzero = 0
+ for j in range(N//4):
+ block = A[j*4:(j+1)*4, i*8:(i+1)*8]
+ qblock = Aq[j*4:(j+1)*4, i*8:(i+1)*8]
+ if np.sum(np.abs(block)) > 1e-10:
+ nb_nonzero = nb_nonzero + 1
+ idx = np.append(idx, j*4)
+ vblock = qblock.transpose((1,0)).reshape((-1,))
+ W0 = np.concatenate([W0, block.reshape((-1,))])
+ W = np.concatenate([W, vblock])
+ idx[pos] = nb_nonzero
+
+ if quantize: print_vector(writer, W, name + '_int8', reshape_8x4=False, dtype='opus_int8')
+ print_vector(writer, W0, name + '_float', reshape_8x4=False, dtype='float', debug_float=quantize)
+ print_vector(writer, idx, name + '_idx', reshape_8x4=False, dtype='int')
+
+ return Aq
+
+
+
+def compute_scaling(weight):
+ """ computes optimal scaling vector for weight of shape (features_in, features_out) """
+
+ n_in, n_out = weight.shape
+ assert n_in % 4 == 0 and n_out % 8 == 0
+
+ weight_max_abs = np.max(np.abs(weight), axis=0)
+ weight_max_sum = np.max(np.abs(weight[: n_in : 2] + weight[1 : n_in : 2]), axis=0)
+ scale_max = weight_max_abs / 127
+ scale_sum = weight_max_sum / 129
+
+ scale = np.maximum(scale_max, scale_sum)
+
+ return scale
+
+def qn(string):
+ if string == "NULL": return string
+ else: return '"' + string + '"'
+
+def print_linear_layer(writer : CWriter,
+ name : str,
+ weight : np.ndarray,
+ bias : np.ndarray,
+ scale : np.ndarray = None,
+ sparse : bool = False,
+ diagonal : bool = False,
+ quantize : bool = True):
+
+ """ prints linear layer
+
+ Parameters:
+ -----------
+ name : str
+ layer name
+ weight: np.ndarray
+ ...
+ scale: np.ndarray or None
+ If None auto scaling will be applied. Otherwise, output channels will be multiplied by scale (the usual broadcasting rules apply).
+
+
+ """
+
+ if len(weight.shape) != 2:
+ raise ValueError('expecting 2-dim weight array in print_linear_layer')
+
+
+ bias_name = "NULL" if bias is None else name + "_bias"
+ subias_name = name + "_subias" if quantize else "NULL"
+ scale_name = name + "_scale" if quantize else "NULL"
+ idx_name = name + "_weights_idx" if sparse else "NULL"
+ float_weight_name = name + "_weights_float"
+ int_weight_name = name + "_weights_int8" if quantize else "NULL"
+ diag_name = name + "_weights_diag" if sparse and diagonal else "NULL"
+
+ nb_inputs, nb_outputs = weight.shape
+
+ if scale is None and quantize:
+ scale = compute_scaling(weight)
+
+
+ if sparse:
+ weight_q = print_sparse_weight(writer, weight, name + "_weights", scale=scale, have_diag=diagonal, quantize=quantize)
+ else:
+ if quantize:
+ weight_q = quantize_weight(weight, scale)
+ print_vector(writer, weight_q, name + "_weights_int8", dtype='opus_int8', reshape_8x4=True)
+
+ print_vector(writer, weight, name + "_weights_float", dtype='float', reshape_8x4=False, debug_float=quantize)
+
+ if quantize:
+ subias = (np.zeros(nb_outputs) if bias is None else bias) - np.sum(weight_q * scale, axis=0)
+ print_vector(writer, subias, name + "_subias")
+
+ final_scale = scale / 127 * np.ones(nb_outputs)
+ print_vector(writer, final_scale, name + "_scale")
+
+ if bias is not None:
+ print_vector(writer, bias, name + "_bias")
+
+
+ init_call = f'linear_init(&model->{name}, arrays, {qn(bias_name)}, {qn(subias_name)}, {qn(int_weight_name)},' \
+ + f'{qn(float_weight_name)}, {qn(idx_name)}, {qn(diag_name)}, {qn(scale_name)}, {nb_inputs}, {nb_outputs})'
+
+ writer.layer_dict[name] = ('LinearLayer', init_call)
+
+
+def print_dense_layer(writer : CWriter,
+ name : str,
+ weight : np.ndarray,
+ bias : np.ndarray,
+ scale=1/128,
+ format : str = 'torch',
+ sparse=False,
+ diagonal=False,
+ quantize=False):
+
+ if format == 'torch':
+ weight = weight.transpose()
+
+ print_linear_layer(writer, name, weight, bias, scale=scale, sparse=sparse, diagonal=diagonal, quantize=quantize)
+
+ writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[1]}\n")
+
+
+def print_conv1d_layer(writer : CWriter,
+ name : str,
+ weight : np.ndarray,
+ bias : np.ndarray,
+ scale=1/128,
+ format : str = 'torch',
+ quantize=False,
+ sparse=False):
+
+
+ if format == "torch":
+ # convert to channels last
+ weight = np.transpose(weight, (2, 1, 0))
+
+ lin_weight = np.reshape(weight, (-1, weight.shape[-1]))
+ print_linear_layer(writer, name, lin_weight, bias, scale=scale, sparse=sparse, diagonal=False, quantize=quantize)
+
+
+ writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {weight.shape[2]}\n")
+ writer.header.write(f"\n#define {name.upper()}_IN_SIZE {weight.shape[1]}\n")
+ writer.header.write(f"\n#define {name.upper()}_STATE_SIZE ({weight.shape[1]} * ({weight.shape[0] - 1}))\n")
+ writer.header.write(f"\n#define {name.upper()}_DELAY {(weight.shape[0] - 1) // 2}\n") # CAVE: delay is not a property of the conv layer
+
+ return weight.shape[0] * weight.shape[1]
+
+def print_conv2d_layer(writer : CWriter,
+ name : str,
+ weight : np.ndarray,
+ bias : np.ndarray,
+ scale : float=1/128,
+ quantize : bool=False):
+
+ if quantize:
+ print("[print_conv2d_layer] warning: quantize argument ignored")
+
+ bias_name = name + "_bias"
+ float_weight_name = name + "_weight_float"
+
+ print_vector(writer, weight, float_weight_name)
+ print_vector(writer, bias, bias_name)
+
+ # init function
+ out_channels, in_channels, ksize1, ksize2 = weight.shape
+ init_call = f'conv2d_init(&model->{name}, arrays, "{bias_name}", "{float_weight_name}", {in_channels}, {out_channels}, {ksize1}, {ksize2})'
+
+ writer.layer_dict[name] = ('Conv2dLayer', init_call)
+
+
+
+def print_gru_layer(writer : CWriter,
+ name : str,
+ weight : np.ndarray,
+ recurrent_weight : np.ndarray,
+ bias : np.ndarray,
+ recurrent_bias : np.ndarray,
+ format : str = 'torch',
+ quantize : bool = False,
+ input_sparse : bool = False,
+ recurrent_sparse : bool = False,
+ scale=1/128,
+ recurrent_scale=1/128
+ ):
+
+ if format == "torch":
+ # change gate ordering from rzn to zrn
+
+ N = weight.shape[0] // 3
+ for x in [weight, recurrent_weight, bias, recurrent_bias]:
+ if x is None: continue
+ tmp = x[0:N].copy()
+ x[0:N] = x[N:2*N]
+ x[N:2*N] = tmp
+
+ weight = weight.transpose()
+ recurrent_weight = recurrent_weight.transpose()
+ else:
+ N = weight.shape[1] // 3
+
+ print_linear_layer(writer, name + "_input", weight, bias, scale=scale, sparse=input_sparse, quantize=quantize)
+ print_linear_layer(writer, name + "_recurrent", recurrent_weight, recurrent_bias, scale=recurrent_scale, sparse=recurrent_sparse, diagonal=recurrent_sparse, quantize=quantize)
+
+ # wrapping it up
+ writer.header.write(f"\n#define {name.upper()}_OUT_SIZE {N}\n")
+ writer.header.write(f"\n#define {name.upper()}_STATE_SIZE {N}\n")
+
+ return N
+
+
+def print_tconv1d_layer(writer : CWriter,
+ name : str,
+ weight : np.ndarray,
+ bias : np.ndarray,
+ stride: int,
+ scale=1/128,
+ quantize=False,
+ sparse=False):
+
+ in_channels, out_channels, kernel_size = weight.shape
+
+
+ linear_weight = weight.transpose(2, 1, 0).reshape(kernel_size * out_channels, in_channels).transpose(1, 0)
+ linear_bias = np.repeat(bias[np.newaxis, :], kernel_size, 0).flatten()
+
+ print_linear_layer(writer, name, linear_weight, linear_bias, scale=scale, quantize=quantize, sparse=sparse)
+
+ writer.header.write(f"\n#define {name.upper()}_KERNEL_SIZE {kernel_size}\n")
+ writer.header.write(f"\n#define {name.upper()}_STRIDE {stride}\n")
+ writer.header.write(f"\n#define {name.upper()}_IN_CHANNELS {in_channels}\n")
+ writer.header.write(f"\n#define {name.upper()}_OUT_CHANNELS {out_channels}\n") \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/wexchange/tf/__init__.py b/dnn/torch/weight-exchange/wexchange/tf/__init__.py
new file mode 100644
index 00000000..02678048
--- /dev/null
+++ b/dnn/torch/weight-exchange/wexchange/tf/__init__.py
@@ -0,0 +1,5 @@
+from .tf import dump_tf_conv1d_weights, load_tf_conv1d_weights
+from .tf import dump_tf_dense_weights, load_tf_dense_weights
+from .tf import dump_tf_embedding_weights, load_tf_embedding_weights
+from .tf import dump_tf_gru_weights, load_tf_gru_weights
+from .tf import dump_tf_weights, load_tf_weights \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/wexchange/tf/tf.py b/dnn/torch/weight-exchange/wexchange/tf/tf.py
new file mode 100644
index 00000000..bebbb55a
--- /dev/null
+++ b/dnn/torch/weight-exchange/wexchange/tf/tf.py
@@ -0,0 +1,188 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+
+import tensorflow as tf
+import numpy as np
+
+from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer
+
+def dump_tf_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):
+
+
+ assert gru.activation == tf.keras.activations.tanh
+ assert gru.recurrent_activation == tf.keras.activations.sigmoid
+ assert gru.reset_after == True
+
+ w_ih = gru.weights[0].numpy().transpose().copy()
+ w_hh = gru.weights[1].numpy().transpose().copy()
+ b_ih = gru.weights[2].numpy()[0].copy()
+ b_hh = gru.weights[2].numpy()[1].copy()
+
+ if isinstance(where, CWriter):
+ return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, format='tf', input_sparse=input_sparse, recurrent_sparse=recurrent_sparse, quantize=quantize, scale=scale, recurrent_scale=recurrent_scale)
+ else:
+ os.makedirs(where, exist_ok=True)
+
+ # zrn => rzn
+ N = w_ih.shape[0] // 3
+ for x in [w_ih, w_hh, b_ih, b_hh]:
+ tmp = x[0:N].copy()
+ x[0:N] = x[N:2*N]
+ x[N:2*N] = tmp
+
+ np.save(os.path.join(where, 'weight_ih_rzn.npy'), w_ih)
+ np.save(os.path.join(where, 'weight_hh_rzn.npy'), w_hh)
+ np.save(os.path.join(where, 'bias_ih_rzn.npy'), b_ih)
+ np.save(os.path.join(where, 'bias_hh_rzn.npy'), b_hh)
+
+
+def load_tf_gru_weights(path, gru):
+
+ assert gru.activation == tf.keras.activations.tanh
+ assert gru.recurrent_activation == tf.keras.activations.sigmoid
+ assert gru.reset_after == True
+
+ w_ih = np.load(os.path.join(path, 'weight_ih_rzn.npy'))
+ w_hh = np.load(os.path.join(path, 'weight_hh_rzn.npy'))
+ b_ih = np.load(os.path.join(path, 'bias_ih_rzn.npy'))
+ b_hh = np.load(os.path.join(path, 'bias_hh_rzn.npy'))
+
+ # rzn => zrn
+ N = w_ih.shape[0] // 3
+ for x in [w_ih, w_hh, b_ih, b_hh]:
+ tmp = x[0:N].copy()
+ x[0:N] = x[N:2*N]
+ x[N:2*N] = tmp
+
+ gru.weights[0].assign(tf.convert_to_tensor(w_ih.transpose()))
+ gru.weights[1].assign(tf.convert_to_tensor(w_hh.transpose()))
+ gru.weights[2].assign(tf.convert_to_tensor(np.vstack((b_ih, b_hh))))
+
+
+def dump_tf_dense_weights(where, dense, name='dense', scale=1/128, sparse=False, diagonal=False, quantize=False):
+
+ w = dense.weights[0].numpy()
+ if dense.bias is None:
+ b = np.zeros(dense.units, dtype=w.dtype)
+ else:
+ b = dense.bias.numpy()
+
+
+
+ if isinstance(where, CWriter):
+ return print_dense_layer(where, name, w, b, scale=scale, format='tf', sparse=sparse, diagonal=diagonal, quantize=quantize)
+
+ else:
+ os.makedirs(where, exist_ok=True)
+
+ np.save(os.path.join(where, 'weight.npy'), w.transpose())
+ np.save(os.path.join(where, 'bias.npy'), b)
+
+
+def load_tf_dense_weights(path, dense):
+
+ w = np.load(os.path.join(path, 'weight.npy')).transpose()
+ b = np.load(os.path.join(path, 'bias.npy'))
+
+ dense.weights[0].assign(tf.convert_to_tensor(w))
+ if dense.bias is not None:
+ dense.weights[1].assign(tf.convert_to_tensor(b))
+
+
+def dump_tf_conv1d_weights(where, conv, name='conv', scale=1/128, quantize=False):
+
+ assert conv.data_format == 'channels_last'
+
+ w = conv.weights[0].numpy().copy()
+ if conv.bias is None:
+ b = np.zeros(conv.filters, dtype=w.dtype)
+ else:
+ b = conv.bias.numpy()
+
+ if isinstance(where, CWriter):
+ return print_conv1d_layer(where, name, w, b, scale=scale, format='tf', quantize=quantize)
+ else:
+ os.makedirs(where, exist_ok=True)
+
+ w = np.transpose(w, (2, 1, 0))
+ np.save(os.path.join(where, 'weight_oik.npy'), w)
+ np.save(os.path.join(where, 'bias.npy'), b)
+
+
+def load_tf_conv1d_weights(path, conv):
+
+ w = np.load(os.path.join(path, 'weight_oik.npy'))
+ b = np.load(os.path.join(path, 'bias.npy'))
+
+ w = np.transpose(w, (2, 1, 0))
+
+ conv.weights[0].assign(tf.convert_to_tensor(w))
+ if conv.bias is not None:
+ conv.weights[1].assign(tf.convert_to_tensor(b))
+
+
+def dump_tf_embedding_weights(path, emb):
+ os.makedirs(path, exist_ok=True)
+
+ w = emb.weights[0].numpy()
+ np.save(os.path.join(path, 'weight.npy'), w)
+
+
+
+def load_tf_embedding_weights(path, emb):
+
+ w = np.load(os.path.join(path, 'weight.npy'))
+ emb.weights[0].assign(tf.convert_to_tensor(w))
+
+
+def dump_tf_weights(path, module):
+ if isinstance(module, tf.keras.layers.Dense):
+ dump_tf_dense_weights(path, module)
+ elif isinstance(module, tf.keras.layers.GRU):
+ dump_tf_gru_weights(path, module)
+ elif isinstance(module, tf.keras.layers.Conv1D):
+ dump_tf_conv1d_weights(path, module)
+ elif isinstance(module, tf.keras.layers.Embedding):
+ dump_tf_embedding_weights(path, module)
+ else:
+ raise ValueError(f'dump_tf_weights: layer of type {type(module)} not supported')
+
+def load_tf_weights(path, module):
+ if isinstance(module, tf.keras.layers.Dense):
+ load_tf_dense_weights(path, module)
+ elif isinstance(module, tf.keras.layers.GRU):
+ load_tf_gru_weights(path, module)
+ elif isinstance(module, tf.keras.layers.Conv1D):
+ load_tf_conv1d_weights(path, module)
+ elif isinstance(module, tf.keras.layers.Embedding):
+ load_tf_embedding_weights(path, module)
+ else:
+ raise ValueError(f'dump_tf_weights: layer of type {type(module)} not supported') \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/wexchange/torch/__init__.py b/dnn/torch/weight-exchange/wexchange/torch/__init__.py
new file mode 100644
index 00000000..8245566d
--- /dev/null
+++ b/dnn/torch/weight-exchange/wexchange/torch/__init__.py
@@ -0,0 +1,37 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+from .torch import dump_torch_conv1d_weights, load_torch_conv1d_weights
+from .torch import dump_torch_conv2d_weights, load_torch_conv2d_weights
+from .torch import dump_torch_dense_weights, load_torch_dense_weights
+from .torch import dump_torch_gru_weights, load_torch_gru_weights
+from .torch import dump_torch_grucell_weights
+from .torch import dump_torch_embedding_weights, load_torch_embedding_weights
+from .torch import dump_torch_weights, load_torch_weights
+from .torch import dump_torch_adaptive_conv1d_weights \ No newline at end of file
diff --git a/dnn/torch/weight-exchange/wexchange/torch/torch.py b/dnn/torch/weight-exchange/wexchange/torch/torch.py
new file mode 100644
index 00000000..af5d3e59
--- /dev/null
+++ b/dnn/torch/weight-exchange/wexchange/torch/torch.py
@@ -0,0 +1,433 @@
+"""
+/* Copyright (c) 2023 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import sys
+
+import torch
+import numpy as np
+
+sys.path.append(sys.path.append(os.path.join(os.path.dirname(__file__), '../osce')))
+try:
+ import utils.layers as osce_layers
+ from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
+ from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
+ from utils.layers.td_shaper import TDShaper
+ has_osce=True
+except:
+ has_osce=False
+
+from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer, print_tconv1d_layer, print_conv2d_layer
+
+def dump_torch_adaptive_conv1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
+
+
+ w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
+ b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
+ w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
+ b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
+
+ if isinstance(where, CWriter):
+ # pad kernel for quantization
+ left_padding = adaconv.padding[0]
+ kernel_size = adaconv.kernel_size
+ in_channels = adaconv.in_channels
+ out_channels = adaconv.out_channels
+ feature_dim = adaconv.feature_dim
+
+ if quantize and kernel_size % 8:
+ kernel_padding = 8 - (kernel_size % 8)
+ w_kernel = np.concatenate(
+ (np.zeros((out_channels, in_channels, kernel_padding, feature_dim)), w_kernel.reshape(out_channels, in_channels, kernel_size, feature_dim)),
+ dtype=w_kernel.dtype,
+ axis=2).reshape(-1, feature_dim)
+ b_kernel = np.concatenate(
+ (np.zeros((out_channels, in_channels, kernel_padding)), b_kernel.reshape(out_channels, in_channels, kernel_size)),
+ dtype=b_kernel.dtype,
+ axis=2).reshape(-1)
+ left_padding += kernel_padding
+ kernel_size += kernel_padding
+
+ # write relevant scalar parameters to header file
+ where.header.write(f"""
+#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
+#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
+#define {name.upper()}_SHAPE_GAIN {adaconv.shape_gain:f}f
+#define {name.upper()}_KERNEL_SIZE {kernel_size}
+#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
+#define {name.upper()}_LEFT_PADDING {left_padding}
+#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
+#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
+#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
+#define {name.upper()}_NORM_P {adaconv.norm_p}
+#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
+"""
+ )
+
+ print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
+ print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
+
+
+ else:
+ np.save(where, 'weight_kernel.npy', w_kernel)
+ np.save(where, 'bias_kernel.npy', b_kernel)
+ np.save(where, 'weight_gain.npy', w_gain)
+ np.save(where, 'bias_gain.npy', b_gain)
+
+
+def dump_torch_adaptive_comb1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
+
+
+ w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
+ b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
+ w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
+ b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
+ w_global_gain = adaconv.global_filter_gain.weight.detach().cpu().numpy().copy()
+ b_global_gain = adaconv.global_filter_gain.bias.detach().cpu().numpy().copy()
+
+
+ if isinstance(where, CWriter):
+ # pad kernel for quantization
+ left_padding = adaconv.padding[0]
+ kernel_size = adaconv.kernel_size
+
+ if quantize and w_kernel.shape[0] % 8:
+ kernel_padding = 8 - (w_kernel.shape[0] % 8)
+ w_kernel = np.concatenate((np.zeros((kernel_padding, w_kernel.shape[1])), w_kernel), dtype=w_kernel.dtype)
+ b_kernel = np.concatenate((np.zeros((kernel_padding)), b_kernel), dtype=b_kernel.dtype)
+ left_padding += kernel_padding
+ kernel_size += kernel_padding
+ # write relevant scalar parameters to header file
+ where.header.write(f"""
+#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
+#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
+#define {name.upper()}_LOG_GAIN_LIMIT {adaconv.log_gain_limit:f}f
+#define {name.upper()}_KERNEL_SIZE {kernel_size}
+#define {name.upper()}_LEFT_PADDING {left_padding}
+#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
+#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
+#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
+#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
+#define {name.upper()}_NORM_P {adaconv.norm_p}
+#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
+#define {name.upper()}_MAX_LAG {adaconv.max_lag}
+"""
+ )
+
+ print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
+ print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
+ print_dense_layer(where, name + "_global_gain", w_global_gain, b_global_gain, format='torch', sparse=False, diagonal=False, quantize=False)
+
+
+ else:
+ np.save(where, 'weight_kernel.npy', w_kernel)
+ np.save(where, 'bias_kernel.npy', b_kernel)
+ np.save(where, 'weight_gain.npy', w_gain)
+ np.save(where, 'bias_gain.npy', b_gain)
+ np.save(where, 'weight_global_gain.npy', w_global_gain)
+ np.save(where, 'bias_global_gain.npy', b_global_gain)
+
+def dump_torch_tdshaper(where, shaper, name='tdshaper', quantize=False, scale=1/128):
+
+ if isinstance(where, CWriter):
+ where.header.write(f"""
+#define {name.upper()}_FEATURE_DIM {shaper.feature_dim}
+#define {name.upper()}_FRAME_SIZE {shaper.frame_size}
+#define {name.upper()}_AVG_POOL_K {shaper.avg_pool_k}
+#define {name.upper()}_INNOVATE {1 if shaper.innovate else 0}
+#define {name.upper()}_POOL_AFTER {1 if shaper.pool_after else 0}
+"""
+ )
+
+ dump_torch_conv1d_weights(where, shaper.feature_alpha1_f, name + "_alpha1_f", quantize=quantize, scale=scale)
+ dump_torch_conv1d_weights(where, shaper.feature_alpha1_t, name + "_alpha1_t")
+ dump_torch_conv1d_weights(where, shaper.feature_alpha2, name + "_alpha2")
+
+ if shaper.innovate:
+ dump_torch_conv1d_weights(where, shaper.feature_alpha1b, name + "_alpha1b")
+ dump_torch_conv1d_weights(where, shaper.feature_alpha1c, name + "_alpha1c")
+ dump_torch_conv1d_weights(where, shaper.feature_alpha2b, name + "_alpha2b")
+ dump_torch_conv1d_weights(where, shaper.feature_alpha2c, name + "_alpha2c")
+
+
+
+def dump_torch_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):
+
+ assert gru.num_layers == 1
+ assert gru.bidirectional == False
+
+ w_ih = gru.weight_ih_l0.detach().cpu().numpy().copy()
+ w_hh = gru.weight_hh_l0.detach().cpu().numpy().copy()
+ if hasattr(gru, 'bias_ih_l0'):
+ b_ih = gru.bias_ih_l0.detach().cpu().numpy().copy()
+ else:
+ b_ih = None
+ if hasattr(gru, 'bias_hh_l0'):
+ b_hh = gru.bias_hh_l0.detach().cpu().numpy().copy()
+ else:
+ b_hh = None
+
+ if isinstance(where, CWriter):
+ return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, format='torch', input_sparse=input_sparse, recurrent_sparse=recurrent_sparse, quantize=quantize, scale=scale, recurrent_scale=recurrent_scale)
+ else:
+ os.makedirs(where, exist_ok=True)
+
+ np.save(os.path.join(where, 'weight_ih_rzn.npy'), w_ih)
+ np.save(os.path.join(where, 'weight_hh_rzn.npy'), w_hh)
+ np.save(os.path.join(where, 'bias_ih_rzn.npy'), b_ih)
+ np.save(os.path.join(where, 'bias_hh_rzn.npy'), b_hh)
+
+
+def dump_torch_grucell_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):
+
+ w_ih = gru.weight_ih.detach().cpu().numpy().copy()
+ w_hh = gru.weight_hh.detach().cpu().numpy().copy()
+ if hasattr(gru, 'bias_ih') and gru.bias_ih is not None:
+ b_ih = gru.bias_ih.detach().cpu().numpy().copy()
+ else:
+ b_ih = None
+ if hasattr(gru, 'bias_hh') and gru.bias_hh is not None:
+ b_hh = gru.bias_hh.detach().cpu().numpy().copy()
+ else:
+ b_hh = None
+
+ if isinstance(where, CWriter):
+ return print_gru_layer(where, name, w_ih, w_hh, b_ih, b_hh, format='torch', input_sparse=input_sparse, recurrent_sparse=recurrent_sparse, quantize=quantize, scale=scale, recurrent_scale=recurrent_scale)
+ else:
+ os.makedirs(where, exist_ok=True)
+
+ np.save(os.path.join(where, 'weight_ih_rzn.npy'), w_ih)
+ np.save(os.path.join(where, 'weight_hh_rzn.npy'), w_hh)
+ np.save(os.path.join(where, 'bias_ih_rzn.npy'), b_ih)
+ np.save(os.path.join(where, 'bias_hh_rzn.npy'), b_hh)
+
+
+
+def load_torch_gru_weights(where, gru):
+
+ assert gru.num_layers == 1
+ assert gru.bidirectional == False
+
+ w_ih = np.load(os.path.join(where, 'weight_ih_rzn.npy'))
+ w_hh = np.load(os.path.join(where, 'weight_hh_rzn.npy'))
+ b_ih = np.load(os.path.join(where, 'bias_ih_rzn.npy'))
+ b_hh = np.load(os.path.join(where, 'bias_hh_rzn.npy'))
+
+ with torch.no_grad():
+ gru.weight_ih_l0.set_(torch.from_numpy(w_ih))
+ gru.weight_hh_l0.set_(torch.from_numpy(w_hh))
+ gru.bias_ih_l0.set_(torch.from_numpy(b_ih))
+ gru.bias_hh_l0.set_(torch.from_numpy(b_hh))
+
+
+def dump_torch_dense_weights(where, dense, name='dense', scale=1/128, sparse=False, diagonal=False, quantize=False):
+
+ w = dense.weight.detach().cpu().numpy().copy()
+ if dense.bias is None:
+ b = np.zeros(dense.out_features, dtype=w.dtype)
+ else:
+ b = dense.bias.detach().cpu().numpy().copy()
+
+ if isinstance(where, CWriter):
+ return print_dense_layer(where, name, w, b, scale=scale, format='torch', sparse=sparse, diagonal=diagonal, quantize=quantize)
+
+ else:
+ os.makedirs(where, exist_ok=True)
+
+ np.save(os.path.join(where, 'weight.npy'), w)
+ np.save(os.path.join(where, 'bias.npy'), b)
+
+
+def load_torch_dense_weights(where, dense):
+
+ w = np.load(os.path.join(where, 'weight.npy'))
+ b = np.load(os.path.join(where, 'bias.npy'))
+
+ with torch.no_grad():
+ dense.weight.set_(torch.from_numpy(w))
+ if dense.bias is not None:
+ dense.bias.set_(torch.from_numpy(b))
+
+
+def dump_torch_conv1d_weights(where, conv, name='conv', scale=1/128, quantize=False, sparse=False):
+
+ w = conv.weight.detach().cpu().numpy().copy()
+ if conv.bias is None:
+ b = np.zeros(conv.out_channels, dtype=w.dtype)
+ else:
+ b = conv.bias.detach().cpu().numpy().copy()
+
+ if isinstance(where, CWriter):
+
+ return print_conv1d_layer(where, name, w, b, scale=scale, format='torch', quantize=quantize, sparse=sparse)
+ else:
+ os.makedirs(where, exist_ok=True)
+
+ np.save(os.path.join(where, 'weight_oik.npy'), w)
+
+ np.save(os.path.join(where, 'bias.npy'), b)
+
+
+def load_torch_conv1d_weights(where, conv):
+
+ with torch.no_grad():
+ w = np.load(os.path.join(where, 'weight_oik.npy'))
+ conv.weight.set_(torch.from_numpy(w))
+ if type(conv.bias) != type(None):
+ b = np.load(os.path.join(where, 'bias.npy'))
+ if conv.bias is not None:
+ conv.bias.set_(torch.from_numpy(b))
+
+
+def dump_torch_tconv1d_weights(where, conv, name='conv', scale=1/128, quantize=False, sparse=False):
+
+ w = conv.weight.detach().cpu().numpy().copy()
+ if conv.bias is None:
+ b = np.zeros(conv.out_channels, dtype=w.dtype)
+ else:
+ b = conv.bias.detach().cpu().numpy().copy()
+
+ if isinstance(where, CWriter):
+
+ return print_tconv1d_layer(where, name, w, b, conv.stride[0], scale=scale, quantize=quantize, sparse=sparse)
+ else:
+ os.makedirs(where, exist_ok=True)
+
+ np.save(os.path.join(where, 'weight_oik.npy'), w)
+
+ np.save(os.path.join(where, 'bias.npy'), b)
+
+
+def load_torch_tconv1d_weights(where, conv):
+
+ with torch.no_grad():
+ w = np.load(os.path.join(where, 'weight_oik.npy'))
+ conv.weight.set_(torch.from_numpy(w))
+ if type(conv.bias) != type(None):
+ b = np.load(os.path.join(where, 'bias.npy'))
+ if conv.bias is not None:
+ conv.bias.set_(torch.from_numpy(b))
+
+
+def dump_torch_conv2d_weights(where, conv, name='conv', scale=1/128, quantize=False):
+ w = conv.weight.detach().cpu().permute(0, 1, 3, 2).numpy().copy()
+ if conv.bias is None:
+ b = np.zeros(conv.out_channels, dtype=w.dtype)
+ else:
+ b = conv.bias.detach().cpu().numpy().copy()
+
+ if isinstance(where, CWriter):
+ return print_conv2d_layer(where, name, w, b, scale=scale, quantize=quantize)
+
+ else:
+ os.makedirs(where, exist_ok=True)
+
+ np.save(os.path.join(where, 'weight_oiwh.npy'), w)
+
+ np.save(os.path.join(where, 'bias.npy'), b)
+
+def load_torch_conv2d_weights(where, conv):
+ with torch.no_grad():
+ w = np.load(os.path.join(where, 'weight_oiwh.npy'))
+ conv.weight.set_(torch.from_numpy(w).permute(0, 1, 3, 2))
+ if type(conv.bias) != type(None):
+ b = np.load(os.path.join(where, 'bias.npy'))
+ if conv.bias is not None:
+ conv.bias.set_(torch.from_numpy(b))
+
+
+def dump_torch_embedding_weights(where, embed, name='embed', scale=1/128, sparse=False, diagonal=False, quantize=False):
+
+ w = embed.weight.detach().cpu().numpy().copy().transpose()
+ b = np.zeros(w.shape[0], dtype=w.dtype)
+
+ if isinstance(where, CWriter):
+ return print_dense_layer(where, name, w, b, scale=scale, format='torch', sparse=sparse, diagonal=diagonal, quantize=quantize)
+
+ else:
+ os.makedirs(where, exist_ok=True)
+
+ np.save(os.path.join(where, 'weight.npy'), w)
+ np.save(os.path.join(where, 'bias.npy'), b)
+
+
+def load_torch_embedding_weights(where, emb):
+
+ w = np.load(os.path.join(where, 'weight.npy'))
+
+ with torch.no_grad():
+ emb.weight.set_(torch.from_numpy(w))
+
+def dump_torch_weights(where, module, name=None, verbose=False, **kwargs):
+ """ generic function for dumping weights of some torch.nn.Module """
+ if verbose and name is not None:
+ print(f"printing layer {name} of type {type(module)}...")
+ if isinstance(module, torch.nn.Linear):
+ return dump_torch_dense_weights(where, module, name, **kwargs)
+ elif isinstance(module, torch.nn.GRU):
+ return dump_torch_gru_weights(where, module, name, **kwargs)
+ elif isinstance(module, torch.nn.GRUCell):
+ return dump_torch_grucell_weights(where, module, name, **kwargs)
+ elif isinstance(module, torch.nn.Conv1d):
+ return dump_torch_conv1d_weights(where, module, name, **kwargs)
+ elif isinstance(module, torch.nn.Conv2d):
+ return dump_torch_conv2d_weights(where, module, name, **kwargs)
+ elif isinstance(module, torch.nn.Embedding):
+ return dump_torch_embedding_weights(where, module, name, **kwargs)
+ elif isinstance(module, torch.nn.ConvTranspose1d):
+ return dump_torch_tconv1d_weights(where, module, name, **kwargs)
+ else:
+ if has_osce:
+ if isinstance(module, LimitedAdaptiveConv1d):
+ dump_torch_adaptive_conv1d_weights(where, module, name, **kwargs)
+ elif isinstance(module, LimitedAdaptiveComb1d):
+ dump_torch_adaptive_comb1d_weights(where, module, name, **kwargs)
+ elif isinstance(module, TDShaper):
+ dump_torch_tdshaper(where, module, name, **kwargs)
+ else:
+ raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
+ else:
+ raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
+
+def load_torch_weights(where, module):
+ """ generic function for loading weights of some torch.nn.Module """
+ if isinstance(module, torch.nn.Linear):
+ load_torch_dense_weights(where, module)
+ elif isinstance(module, torch.nn.GRU):
+ load_torch_gru_weights(where, module)
+ elif isinstance(module, torch.nn.Conv1d):
+ load_torch_conv1d_weights(where, module)
+ elif isinstance(module, torch.nn.Conv2d):
+ load_torch_conv2d_weights(where, module)
+ elif isinstance(module, torch.nn.Embedding):
+ load_torch_embedding_weights(where, module)
+ elif isinstance(module, torch.nn.ConvTranspose1d):
+ return load_torch_tconv1d_weights(where, module)
+ else:
+ raise ValueError(f'load_torch_weights: layer of type {type(module)} not supported')
diff --git a/dnn/training_tf2/dataloader.py b/dnn/training_tf2/dataloader.py
new file mode 100644
index 00000000..ed441c1e
--- /dev/null
+++ b/dnn/training_tf2/dataloader.py
@@ -0,0 +1,49 @@
+import numpy as np
+from tensorflow.keras.utils import Sequence
+from ulaw import lin2ulaw
+
+def lpc2rc(lpc):
+ #print("shape is = ", lpc.shape)
+ order = lpc.shape[-1]
+ rc = 0*lpc
+ for i in range(order, 0, -1):
+ rc[:,:,i-1] = lpc[:,:,-1]
+ ki = rc[:,:,i-1:i].repeat(i-1, axis=2)
+ lpc = (lpc[:,:,:-1] - ki*lpc[:,:,-2::-1])/(1-ki*ki)
+ return rc
+
+class LPCNetLoader(Sequence):
+ def __init__(self, data, features, periods, batch_size, e2e=False, lookahead=2):
+ self.batch_size = batch_size
+ self.nb_batches = np.minimum(np.minimum(data.shape[0], features.shape[0]), periods.shape[0])//self.batch_size
+ self.data = data[:self.nb_batches*self.batch_size, :]
+ self.features = features[:self.nb_batches*self.batch_size, :]
+ self.periods = periods[:self.nb_batches*self.batch_size, :]
+ self.e2e = e2e
+ self.lookahead = lookahead
+ self.on_epoch_end()
+
+ def on_epoch_end(self):
+ self.indices = np.arange(self.nb_batches*self.batch_size)
+ np.random.shuffle(self.indices)
+
+ def __getitem__(self, index):
+ data = self.data[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
+ in_data = data[: , :, :1]
+ out_data = data[: , :, 1:]
+ features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :-16]
+ periods = self.periods[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
+ outputs = [out_data]
+ inputs = [in_data, features, periods]
+ if self.lookahead > 0:
+ lpc = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], 4-self.lookahead:-self.lookahead, -16:]
+ else:
+ lpc = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], 4:, -16:]
+ if self.e2e:
+ outputs.append(lpc2rc(lpc))
+ else:
+ inputs.append(lpc)
+ return (inputs, outputs)
+
+ def __len__(self):
+ return self.nb_batches
diff --git a/dnn/training_tf2/decode_rdovae.py b/dnn/training_tf2/decode_rdovae.py
new file mode 100644
index 00000000..f9bf9bf6
--- /dev/null
+++ b/dnn/training_tf2/decode_rdovae.py
@@ -0,0 +1,111 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+ Copyright (c) 2018-2019 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+# Train an LPCNet model
+
+import argparse
+#from plc_loader import PLCLoader
+
+parser = argparse.ArgumentParser(description='Train a PLC model')
+
+parser.add_argument('bits', metavar='<bits file>', help='binary features file (int16)')
+parser.add_argument('output', metavar='<output>', help='output features')
+parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
+group1 = parser.add_mutually_exclusive_group()
+group1.add_argument('--weights', metavar='<input weights>', help='model weights')
+parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
+parser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)')
+parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
+
+
+args = parser.parse_args()
+
+import importlib
+rdovae = importlib.import_module(args.model)
+
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
+import tensorflow.keras.backend as K
+import h5py
+
+import tensorflow as tf
+from rdovae import pvq_quantize
+from rdovae import apply_dead_zone
+
+# Try reducing batch_size if you run out of memory on your GPU
+batch_size = args.batch_size
+
+model, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size)
+model.load_weights(args.weights)
+
+lpc_order = 16
+nbits=80
+
+
+bits_file = args.bits
+sequence_size = args.seq_length
+
+# u for unquantised, load 16 bit PCM samples and convert to mu-law
+
+
+bits = np.memmap(bits_file + "-syms.f32", dtype='float32', mode='r')
+nb_sequences = len(bits)//(40*sequence_size)//batch_size*batch_size
+bits = bits[:nb_sequences*sequence_size*40]
+
+bits = np.reshape(bits, (nb_sequences, sequence_size//2, 20*4))
+print(bits.shape)
+
+lambda_val = 0.001 * np.ones((nb_sequences, sequence_size//2, 1))
+quant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16')
+quant_id = quant_id[:,:,0]
+quant_embed = qembedding(quant_id)
+quant_scale = tf.math.softplus(quant_embed[:,:,:nbits])
+dead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits])
+
+bits = bits*quant_scale
+bits = np.round(apply_dead_zone([bits, dead_zone]).numpy())
+bits = bits/quant_scale
+
+
+state = np.memmap(bits_file + "-state.f32", dtype='float32', mode='r')
+
+state = np.reshape(state, (nb_sequences, sequence_size//2, 24))
+state = state[:,-1,:]
+state = pvq_quantize(state, 82)
+#state = state/(1e-15+tf.norm(state, axis=-1,keepdims=True))
+
+print("shapes are:")
+print(bits.shape)
+print(state.shape)
+
+bits = bits[:,1::2,:]
+features = decoder.predict([bits, state], batch_size=batch_size)
+
+features.astype('float32').tofile(args.output)
diff --git a/dnn/training_tf2/diffembed.py b/dnn/training_tf2/diffembed.py
new file mode 100644
index 00000000..e04ae154
--- /dev/null
+++ b/dnn/training_tf2/diffembed.py
@@ -0,0 +1,49 @@
+"""
+Modification of Tensorflow's Embedding Layer:
+ 1. Not restricted to be the first layer of a model
+ 2. Differentiable (allows non-integer lookups)
+ - For non integer lookup, this layer linearly interpolates between the adjacent embeddings in the following way to preserver gradient flow
+ - E = (1 - frac(x))*embed(floor(x)) + frac(x)*embed(ceil(x))
+"""
+
+import tensorflow as tf
+from tensorflow.keras.layers import Layer
+
+class diff_Embed(Layer):
+ """
+ Parameters:
+ - units: int
+ Dimension of the Embedding
+ - dict_size: int
+ Number of Embeddings to lookup
+ - pcm_init: boolean
+ Initialized for the embedding matrix
+ """
+ def __init__(self, units=128, dict_size = 256, pcm_init = True, initializer = None, **kwargs):
+ super(diff_Embed, self).__init__(**kwargs)
+ self.units = units
+ self.dict_size = dict_size
+ self.pcm_init = pcm_init
+ self.initializer = initializer
+
+ def build(self, input_shape):
+ w_init = tf.random_normal_initializer()
+ if self.pcm_init:
+ w_init = self.initializer
+ self.w = tf.Variable(initial_value=w_init(shape=(self.dict_size, self.units),dtype='float32'),trainable=True)
+
+ def call(self, inputs):
+ alpha = inputs - tf.math.floor(inputs)
+ alpha = tf.expand_dims(alpha,axis = -1)
+ alpha = tf.tile(alpha,[1,1,1,self.units])
+ inputs = tf.cast(inputs,'int32')
+ M = (1 - alpha)*tf.gather(self.w,inputs) + alpha*tf.gather(self.w,tf.clip_by_value(inputs + 1, 0, 255))
+ return M
+
+ def get_config(self):
+ config = super(diff_Embed, self).get_config()
+ config.update({"units": self.units})
+ config.update({"dict_size" : self.dict_size})
+ config.update({"pcm_init" : self.pcm_init})
+ config.update({"initializer" : self.initializer})
+ return config \ No newline at end of file
diff --git a/dnn/training_tf2/dump_lpcnet.py b/dnn/training_tf2/dump_lpcnet.py
new file mode 100755
index 00000000..97ce0ced
--- /dev/null
+++ b/dnn/training_tf2/dump_lpcnet.py
@@ -0,0 +1,388 @@
+#!/usr/bin/python3
+'''Copyright (c) 2017-2018 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+import os
+import io
+import lpcnet
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding
+from ulaw import ulaw2lin, lin2ulaw
+from mdense import MDense
+from diffembed import diff_Embed
+from parameters import get_parameter
+import h5py
+import re
+import argparse
+
+
+# no cuda devices needed
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+# Flag for dumping e2e (differentiable lpc) network weights
+flag_e2e = False
+
+
+max_rnn_neurons = 1
+max_conv_inputs = 1
+max_mdense_tmp = 1
+
+def printVector(f, vector, name, dtype='float', dotp=False):
+ global array_list
+ if dotp:
+ vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
+ vector = vector.transpose((2, 0, 3, 1))
+ v = np.reshape(vector, (-1));
+ #print('static const float ', name, '[', len(v), '] = \n', file=f)
+ if name not in array_list:
+ array_list.append(name)
+ f.write('#ifndef USE_WEIGHTS_FILE\n')
+ f.write('#define WEIGHTS_{}_DEFINED\n'.format(name))
+ f.write('#define WEIGHTS_{}_TYPE WEIGHT_TYPE_{}\n'.format(name, dtype))
+ f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
+ for i in range(0, len(v)):
+ f.write('{}'.format(v[i]))
+ if (i!=len(v)-1):
+ f.write(',')
+ else:
+ break;
+ if (i%8==7):
+ f.write("\n ")
+ else:
+ f.write(" ")
+ #print(v, file=f)
+ f.write('\n};\n')
+ f.write('#endif\n\n')
+ return;
+
+def printSparseVector(f, A, name, have_diag=True):
+ N = A.shape[0]
+ M = A.shape[1]
+ W = np.zeros((0,), dtype='int')
+ W0 = np.zeros((0,))
+ if have_diag:
+ diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
+ A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
+ A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
+ A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
+ printVector(f, diag, name + '_diag')
+ AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
+ idx = np.zeros((0,), dtype='int')
+ for i in range(M//8):
+ pos = idx.shape[0]
+ idx = np.append(idx, -1)
+ nb_nonzero = 0
+ for j in range(N//4):
+ block = A[j*4:(j+1)*4, i*8:(i+1)*8]
+ qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
+ if np.sum(np.abs(block)) > 1e-10:
+ nb_nonzero = nb_nonzero + 1
+ idx = np.append(idx, j*4)
+ vblock = qblock.transpose((1,0)).reshape((-1,))
+ W0 = np.concatenate([W0, block.reshape((-1,))])
+ W = np.concatenate([W, vblock])
+ idx[pos] = nb_nonzero
+ f.write('#ifdef DOT_PROD\n')
+ printVector(f, W, name, dtype='qweight')
+ f.write('#else /*DOT_PROD*/\n')
+ printVector(f, W0, name, dtype='qweight')
+ f.write('#endif /*DOT_PROD*/\n')
+ #idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16)
+ printVector(f, idx, name + '_idx', dtype='int')
+ return AQ
+
+def dump_layer_ignore(self, f, hf):
+ print("ignoring layer " + self.name + " of type " + self.__class__.__name__)
+ return False
+Layer.dump_layer = dump_layer_ignore
+
+def dump_sparse_gru(self, f, hf):
+ global max_rnn_neurons
+ name = 'sparse_' + self.name
+ print("printing layer " + name + " of type sparse " + self.__class__.__name__)
+ weights = self.get_weights()
+ qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
+ printVector(f, weights[-1], name + '_bias')
+ subias = weights[-1].copy()
+ subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
+ printVector(f, subias, name + '_subias')
+ if hasattr(self, 'activation'):
+ activation = self.activation.__name__.upper()
+ else:
+ activation = 'TANH'
+ if hasattr(self, 'reset_after') and not self.reset_after:
+ reset_after = 0
+ else:
+ reset_after = 1
+ neurons = weights[0].shape[1]//3
+ max_rnn_neurons = max(max_rnn_neurons, neurons)
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ model_struct.write(' SparseGRULayer {};\n'.format(name));
+ model_init.write(' if (sparse_gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_recurrent_weights_diag", "{}_recurrent_weights", "{}_recurrent_weights_idx", {}, ACTIVATION_{}, {})) return 1;\n'
+ .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
+ return True
+
+def dump_grub(self, f, hf, gru_a_size):
+ global max_rnn_neurons
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()
+ qweight = printSparseVector(f, weights[0][:gru_a_size, :], name + '_weights', have_diag=False)
+
+ f.write('#ifdef DOT_PROD\n')
+ qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
+ printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
+ f.write('#else /*DOT_PROD*/\n')
+ printVector(f, weights[1], name + '_recurrent_weights')
+ f.write('#endif /*DOT_PROD*/\n')
+
+ printVector(f, weights[-1], name + '_bias')
+ subias = weights[-1].copy()
+ subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
+ subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
+ printVector(f, subias, name + '_subias')
+ if hasattr(self, 'activation'):
+ activation = self.activation.__name__.upper()
+ else:
+ activation = 'TANH'
+ if hasattr(self, 'reset_after') and not self.reset_after:
+ reset_after = 0
+ else:
+ reset_after = 1
+ neurons = weights[0].shape[1]//3
+ max_rnn_neurons = max(max_rnn_neurons, neurons)
+ model_struct.write(' GRULayer {};\n'.format(name));
+ model_init.write(' if (gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_weights", "{}_weights_idx", "{}_recurrent_weights", {}, {}, ACTIVATION_{}, {})) return 1;\n'
+ .format(name, name, name, name, name, name, gru_a_size, weights[0].shape[1]//3, activation, reset_after))
+ return True
+
+def dump_gru_layer_dummy(self, f, hf):
+ name = self.name
+ weights = self.get_weights()
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ return True;
+
+GRU.dump_layer = dump_gru_layer_dummy
+
+def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
+ printVector(f, weights, name + '_weights')
+ printVector(f, bias, name + '_bias')
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
+ model_struct.write(' DenseLayer {};\n'.format(name));
+ model_init.write(' if (dense_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, ACTIVATION_{})) return 1;\n'
+ .format(name, name, name, weights.shape[0], weights.shape[1], activation))
+
+def dump_dense_layer(self, f, hf):
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()
+ activation = self.activation.__name__.upper()
+ dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
+ return False
+
+Dense.dump_layer = dump_dense_layer
+
+def dump_mdense_layer(self, f, hf):
+ global max_mdense_tmp
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()
+ printVector(f, np.transpose(weights[0], (0, 2, 1)), name + '_weights')
+ printVector(f, np.transpose(weights[1], (1, 0)), name + '_bias')
+ printVector(f, np.transpose(weights[2], (1, 0)), name + '_factor')
+ activation = self.activation.__name__.upper()
+ max_mdense_tmp = max(max_mdense_tmp, weights[0].shape[0]*weights[0].shape[2])
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[0]))
+ model_struct.write(' MDenseLayer {};\n'.format(name));
+ model_init.write(' if (mdense_init(&model->{}, arrays, "{}_bias", "{}_weights", "{}_factor", {}, {}, {}, ACTIVATION_{})) return 1;\n'
+ .format(name, name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
+ return False
+MDense.dump_layer = dump_mdense_layer
+
+def dump_conv1d_layer(self, f, hf):
+ global max_conv_inputs
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()
+ printVector(f, weights[0], name + '_weights')
+ printVector(f, weights[-1], name + '_bias')
+ activation = self.activation.__name__.upper()
+ max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
+ hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
+ hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
+ model_struct.write(' Conv1DLayer {};\n'.format(name));
+ model_init.write(' if (conv1d_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, {}, ACTIVATION_{})) return 1;\n'
+ .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
+ return True
+Conv1D.dump_layer = dump_conv1d_layer
+
+
+def dump_embedding_layer_impl(name, weights, f, hf):
+ printVector(f, weights, name + '_weights')
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
+ model_struct.write(' EmbeddingLayer {};\n'.format(name));
+ model_init.write(' if (embedding_init(&model->{}, arrays, "{}_weights", {}, {})) return 1;\n'
+ .format(name, name, weights.shape[0], weights.shape[1]))
+
+def dump_embedding_layer(self, f, hf):
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()[0]
+ dump_embedding_layer_impl(name, weights, f, hf)
+ return False
+Embedding.dump_layer = dump_embedding_layer
+diff_Embed.dump_layer = dump_embedding_layer
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('model_file', type=str, help='model weight h5 file')
+ parser.add_argument('--nnet-header', type=str, help='name of c header file for dumped model', default='nnet_data.h')
+ parser.add_argument('--nnet-source', type=str, help='name of c source file for dumped model', default='nnet_data.c')
+ parser.add_argument('--lpc-gamma', type=float, help='LPC weighting factor. If not specified I will attempt to read it from the model file with 1 as default', default=None)
+ parser.add_argument('--lookahead', type=float, help='Features lookahead. If not specified I will attempt to read it from the model file with 2 as default', default=None)
+
+ args = parser.parse_args()
+
+ filename = args.model_file
+ with h5py.File(filename, "r") as f:
+ units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape)
+ units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape)
+ cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
+ e2e = 'rc2lpc' in f['model_weights']
+
+ model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = e2e, cond_size=cond_size)
+ model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
+ #model.summary()
+
+ model.load_weights(filename, by_name=True)
+
+ cfile = args.nnet_source
+ hfile = args.nnet_header
+
+ f = open(cfile, 'w')
+ hf = open(hfile, 'w')
+ model_struct = io.StringIO()
+ model_init = io.StringIO()
+ model_struct.write('typedef struct {\n')
+ model_init.write('#ifndef DUMP_BINARY_WEIGHTS\n')
+ model_init.write('int init_lpcnet_model(LPCNetModel *model, const WeightArray *arrays) {\n')
+ array_list = []
+
+ f.write('/*This file is automatically generated from a Keras model*/\n')
+ f.write('/*based on model {}*/\n\n'.format(sys.argv[1]))
+ f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile))
+
+ hf.write('/*This file is automatically generated from a Keras model*/\n\n')
+ hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n')
+
+ if e2e:
+ hf.write('/* This is an end-to-end model */\n')
+ hf.write('#define END2END\n\n')
+ else:
+ hf.write('/* This is *not* an end-to-end model */\n')
+ hf.write('/* #define END2END */\n\n')
+
+ # LPC weighting factor
+ if type(args.lpc_gamma) == type(None):
+ lpc_gamma = get_parameter(model, 'lpc_gamma', 1)
+ else:
+ lpc_gamma = args.lpc_gamma
+
+ hf.write('/* LPC weighting factor */\n')
+ hf.write('#define LPC_GAMMA ' + str(lpc_gamma) +'f\n\n')
+
+ # look-ahead
+ if type(args.lookahead) == type(None):
+ lookahead = get_parameter(model, 'lookahead', 2)
+ else:
+ lookahead = args.lookahead
+
+ hf.write('/* Features look-ahead */\n')
+ hf.write('#define FEATURES_DELAY ' + str(lookahead) +'\n\n')
+
+ embed_size = lpcnet.embed_size
+
+ E = model.get_layer('embed_sig').get_weights()[0]
+ W = model.get_layer('gru_a').get_weights()[0][:embed_size,:]
+ dump_embedding_layer_impl('gru_a_embed_sig', np.dot(E, W), f, hf)
+ W = model.get_layer('gru_a').get_weights()[0][embed_size:2*embed_size,:]
+ dump_embedding_layer_impl('gru_a_embed_pred', np.dot(E, W), f, hf)
+ W = model.get_layer('gru_a').get_weights()[0][2*embed_size:3*embed_size,:]
+ dump_embedding_layer_impl('gru_a_embed_exc', np.dot(E, W), f, hf)
+ W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:]
+ #FIXME: dump only half the biases
+ b = model.get_layer('gru_a').get_weights()[2]
+ dump_dense_layer_impl('gru_a_dense_feature', W, b[:len(b)//2], 'LINEAR', f, hf)
+
+ W = model.get_layer('gru_b').get_weights()[0][model.rnn_units1:,:]
+ b = model.get_layer('gru_b').get_weights()[2]
+ # Set biases to zero because they'll be included in the GRU input part
+ # (we need regular and SU biases)
+ dump_dense_layer_impl('gru_b_dense_feature', W, 0*b[:len(b)//2], 'LINEAR', f, hf)
+ dump_grub(model.get_layer('gru_b'), f, hf, model.rnn_units1)
+
+ layer_list = []
+ for i, layer in enumerate(model.layers):
+ if layer.dump_layer(f, hf):
+ layer_list.append(layer.name)
+
+ dump_sparse_gru(model.get_layer('gru_a'), f, hf)
+
+ f.write('#ifndef USE_WEIGHTS_FILE\n')
+ f.write('const WeightArray lpcnet_arrays[] = {\n')
+ for name in array_list:
+ f.write('#ifdef WEIGHTS_{}_DEFINED\n'.format(name))
+ f.write(' {{"{}", WEIGHTS_{}_TYPE, sizeof({}), {}}},\n'.format(name, name, name, name))
+ f.write('#endif\n')
+ f.write(' {NULL, 0, 0, NULL}\n};\n')
+ f.write('#endif\n')
+
+ model_init.write(' return 0;\n}\n')
+ model_init.write('#endif\n')
+ f.write(model_init.getvalue())
+
+ hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
+ hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
+ hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp))
+
+
+ hf.write('typedef struct {\n')
+ for i, name in enumerate(layer_list):
+ hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))
+ hf.write('} NNetState;\n\n')
+
+ model_struct.write('} LPCNetModel;\n\n')
+ hf.write(model_struct.getvalue())
+ hf.write('int init_lpcnet_model(LPCNetModel *model, const WeightArray *arrays);\n\n')
+ hf.write('\n\n#endif\n')
+
+ f.close()
+ hf.close()
diff --git a/dnn/training_tf2/dump_plc.py b/dnn/training_tf2/dump_plc.py
new file mode 100755
index 00000000..a490ade1
--- /dev/null
+++ b/dnn/training_tf2/dump_plc.py
@@ -0,0 +1,296 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+ Copyright (c) 2017-2018 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+import lpcnet_plc
+import io
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding
+import h5py
+import re
+
+# Flag for dumping e2e (differentiable lpc) network weights
+flag_e2e = False
+
+max_rnn_neurons = 1
+max_conv_inputs = 1
+
+def printVector(f, vector, name, dtype='float', dotp=False):
+ global array_list
+ if dotp:
+ vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
+ vector = vector.transpose((2, 0, 3, 1))
+ v = np.reshape(vector, (-1));
+ #print('static const float ', name, '[', len(v), '] = \n', file=f)
+ if name not in array_list:
+ array_list.append(name)
+ f.write('#ifndef USE_WEIGHTS_FILE\n')
+ f.write('#define WEIGHTS_{}_DEFINED\n'.format(name))
+ f.write('#define WEIGHTS_{}_TYPE WEIGHT_TYPE_{}\n'.format(name, dtype))
+ f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
+ for i in range(0, len(v)):
+ f.write('{}'.format(v[i]))
+ if (i!=len(v)-1):
+ f.write(',')
+ else:
+ break;
+ if (i%8==7):
+ f.write("\n ")
+ else:
+ f.write(" ")
+ #print(v, file=f)
+ f.write('\n};\n')
+ f.write('#endif\n\n')
+ return;
+
+def printSparseVector(f, A, name, have_diag=True):
+ N = A.shape[0]
+ M = A.shape[1]
+ W = np.zeros((0,), dtype='int')
+ W0 = np.zeros((0,))
+ if have_diag:
+ diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
+ A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
+ A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
+ A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
+ printVector(f, diag, name + '_diag')
+ AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
+ idx = np.zeros((0,), dtype='int')
+ for i in range(M//8):
+ pos = idx.shape[0]
+ idx = np.append(idx, -1)
+ nb_nonzero = 0
+ for j in range(N//4):
+ block = A[j*4:(j+1)*4, i*8:(i+1)*8]
+ qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
+ if np.sum(np.abs(block)) > 1e-10:
+ nb_nonzero = nb_nonzero + 1
+ idx = np.append(idx, j*4)
+ vblock = qblock.transpose((1,0)).reshape((-1,))
+ W0 = np.concatenate([W0, block.reshape((-1,))])
+ W = np.concatenate([W, vblock])
+ idx[pos] = nb_nonzero
+ f.write('#ifdef DOT_PROD\n')
+ printVector(f, W, name, dtype='qweight')
+ f.write('#else /*DOT_PROD*/\n')
+ printVector(f, W0, name, dtype='qweight')
+ f.write('#endif /*DOT_PROD*/\n')
+ #idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16)
+ printVector(f, idx, name + '_idx', dtype='int')
+ return AQ
+
+def dump_layer_ignore(self, f, hf):
+ print("ignoring layer " + self.name + " of type " + self.__class__.__name__)
+ return False
+Layer.dump_layer = dump_layer_ignore
+
+def dump_sparse_gru(self, f, hf):
+ global max_rnn_neurons
+ name = 'sparse_' + self.name
+ print("printing layer " + name + " of type sparse " + self.__class__.__name__)
+ weights = self.get_weights()
+ qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
+ printVector(f, weights[-1], name + '_bias')
+ subias = weights[-1].copy()
+ subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
+ printVector(f, subias, name + '_subias')
+ if hasattr(self, 'activation'):
+ activation = self.activation.__name__.upper()
+ else:
+ activation = 'TANH'
+ if hasattr(self, 'reset_after') and not self.reset_after:
+ reset_after = 0
+ else:
+ reset_after = 1
+ neurons = weights[0].shape[1]//3
+ max_rnn_neurons = max(max_rnn_neurons, neurons)
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ model_struct.write(' SparseGRULayer {};\n'.format(name));
+ model_init.write(' if (sparse_gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_recurrent_weights_diag", "{}_recurrent_weights", "{}_recurrent_weights_idx", {}, ACTIVATION_{}, {})) return 1;\n'
+ .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
+ return True
+
+def dump_gru_layer(self, f, hf):
+ global max_rnn_neurons
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()
+ qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)
+
+ f.write('#ifdef DOT_PROD\n')
+ qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
+ printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
+ f.write('#else /*DOT_PROD*/\n')
+ printVector(f, weights[1], name + '_recurrent_weights')
+ f.write('#endif /*DOT_PROD*/\n')
+
+ printVector(f, weights[-1], name + '_bias')
+ subias = weights[-1].copy()
+ subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
+ subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
+ printVector(f, subias, name + '_subias')
+ if hasattr(self, 'activation'):
+ activation = self.activation.__name__.upper()
+ else:
+ activation = 'TANH'
+ if hasattr(self, 'reset_after') and not self.reset_after:
+ reset_after = 0
+ else:
+ reset_after = 1
+ neurons = weights[0].shape[1]//3
+ max_rnn_neurons = max(max_rnn_neurons, neurons)
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ model_struct.write(' GRULayer {};\n'.format(name));
+ model_init.write(' if (gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_weights", "{}_weights_idx", "{}_recurrent_weights", {}, {}, ACTIVATION_{}, {})) return 1;\n'
+ .format(name, name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
+ return True
+GRU.dump_layer = dump_gru_layer
+
+def dump_gru_layer_dummy(self, f, hf):
+ name = self.name
+ weights = self.get_weights()
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ return True;
+
+#GRU.dump_layer = dump_gru_layer_dummy
+
+def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
+ printVector(f, weights, name + '_weights')
+ printVector(f, bias, name + '_bias')
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
+ model_struct.write(' DenseLayer {};\n'.format(name));
+ model_init.write(' if (dense_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, ACTIVATION_{})) return 1;\n'
+ .format(name, name, name, weights.shape[0], weights.shape[1], activation))
+
+def dump_dense_layer(self, f, hf):
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()
+ activation = self.activation.__name__.upper()
+ dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
+ return False
+
+Dense.dump_layer = dump_dense_layer
+
+def dump_conv1d_layer(self, f, hf):
+ global max_conv_inputs
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()
+ printVector(f, weights[0], name + '_weights')
+ printVector(f, weights[-1], name + '_bias')
+ activation = self.activation.__name__.upper()
+ max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
+ hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
+ hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
+ model_struct.write(' Conv1DLayer {};\n'.format(name));
+ model_init.write(' if (conv1d_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, {}, ACTIVATION_{})) return 1;\n'
+ .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
+ return True
+Conv1D.dump_layer = dump_conv1d_layer
+
+
+
+filename = sys.argv[1]
+with h5py.File(filename, "r") as f:
+ units = min(f['model_weights']['plc_gru1']['plc_gru1']['recurrent_kernel:0'].shape)
+ units2 = min(f['model_weights']['plc_gru2']['plc_gru2']['recurrent_kernel:0'].shape)
+ cond_size = f['model_weights']['plc_dense1']['plc_dense1']['kernel:0'].shape[1]
+
+model = lpcnet_plc.new_lpcnet_plc_model(rnn_units=units, cond_size=cond_size)
+model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
+#model.summary()
+
+model.load_weights(filename, by_name=True)
+
+if len(sys.argv) > 2:
+ cfile = sys.argv[2];
+ hfile = sys.argv[3];
+else:
+ cfile = 'plc_data.c'
+ hfile = 'plc_data.h'
+
+
+f = open(cfile, 'w')
+hf = open(hfile, 'w')
+model_struct = io.StringIO()
+model_init = io.StringIO()
+model_struct.write('typedef struct {\n')
+model_init.write('#ifndef DUMP_BINARY_WEIGHTS\n')
+model_init.write('int init_plc_model(PLCModel *model, const WeightArray *arrays) {\n')
+array_list = []
+
+
+f.write('/*This file is automatically generated from a Keras model*/\n')
+f.write('/*based on model {}*/\n\n'.format(sys.argv[1]))
+f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile))
+
+hf.write('/*This file is automatically generated from a Keras model*/\n\n')
+hf.write('#ifndef PLC_DATA_H\n#define PLC_DATA_H\n\n#include "nnet.h"\n\n')
+
+layer_list = []
+for i, layer in enumerate(model.layers):
+ if layer.dump_layer(f, hf):
+ layer_list.append(layer.name)
+
+#dump_sparse_gru(model.get_layer('gru_a'), f, hf)
+f.write('#ifndef USE_WEIGHTS_FILE\n')
+f.write('const WeightArray lpcnet_plc_arrays[] = {\n')
+for name in array_list:
+ f.write('#ifdef WEIGHTS_{}_DEFINED\n'.format(name))
+ f.write(' {{"{}", WEIGHTS_{}_TYPE, sizeof({}), {}}},\n'.format(name, name, name, name))
+ f.write('#endif\n')
+f.write(' {NULL, 0, 0, NULL}\n};\n')
+f.write('#endif\n')
+
+model_init.write(' return 0;\n}\n')
+model_init.write('#endif\n')
+f.write(model_init.getvalue())
+
+
+hf.write('#define PLC_MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
+#hf.write('#define PLC_MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
+
+hf.write('typedef struct {\n')
+for i, name in enumerate(layer_list):
+ hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))
+hf.write('} PLCNetState;\n\n')
+
+model_struct.write('} PLCModel;\n\n')
+hf.write(model_struct.getvalue())
+hf.write('int init_plc_model(PLCModel *model, const WeightArray *arrays);\n\n')
+
+hf.write('\n\n#endif\n')
+
+f.close()
+hf.close()
diff --git a/dnn/training_tf2/dump_rdovae.py b/dnn/training_tf2/dump_rdovae.py
new file mode 100644
index 00000000..1858c8a4
--- /dev/null
+++ b/dnn/training_tf2/dump_rdovae.py
@@ -0,0 +1,306 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+import argparse
+from ftplib import parse150
+import os
+
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
+parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
+parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
+parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
+
+args = parser.parse_args()
+
+# now import the heavy stuff
+import tensorflow as tf
+import numpy as np
+from keraslayerdump import dump_conv1d_layer, dump_dense_layer, dump_gru_layer, printVector
+from rdovae import new_rdovae_model
+
+def start_header(header_fid, header_name):
+ header_guard = os.path.basename(header_name)[:-2].upper() + "_H"
+ header_fid.write(
+f"""
+#ifndef {header_guard}
+#define {header_guard}
+
+"""
+ )
+
+def finish_header(header_fid):
+ header_fid.write(
+"""
+#endif
+
+"""
+ )
+
+def start_source(source_fid, header_name, weight_file):
+ source_fid.write(
+f"""
+/* this source file was automatically generated from weight file {weight_file} */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "{header_name}"
+
+"""
+ )
+
+def finish_source(source_fid):
+ pass
+
+
+def dump_statistical_model(qembedding, f, fh):
+ w = qembedding.weights[0].numpy()
+ levels, dim = w.shape
+ N = dim // 6
+
+ print("dumping statistical model")
+ quant_scales = tf.math.softplus(w[:, : N]).numpy()
+ dead_zone = 0.05 * tf.math.softplus(w[:, N : 2 * N]).numpy()
+ r = tf.math.sigmoid(w[:, 5 * N : 6 * N]).numpy()
+ p0 = tf.math.sigmoid(w[:, 4 * N : 5 * N]).numpy()
+ p0 = 1 - r ** (0.5 + 0.5 * p0)
+
+ quant_scales_q8 = np.round(quant_scales * 2**8).astype(np.uint16)
+ dead_zone_q10 = np.round(dead_zone * 2**10).astype(np.uint16)
+ r_q15 = np.round(r * 2**15).astype(np.uint16)
+ p0_q15 = np.round(p0 * 2**15).astype(np.uint16)
+
+ printVector(f, quant_scales_q8, 'dred_quant_scales_q8', dtype='opus_uint16', static=False)
+ printVector(f, dead_zone_q10, 'dred_dead_zone_q10', dtype='opus_uint16', static=False)
+ printVector(f, r_q15, 'dred_r_q15', dtype='opus_uint16', static=False)
+ printVector(f, p0_q15, 'dred_p0_q15', dtype='opus_uint16', static=False)
+
+ fh.write(
+f"""
+extern const opus_uint16 dred_quant_scales_q8[{levels * N}];
+extern const opus_uint16 dred_dead_zone_q10[{levels * N}];
+extern const opus_uint16 dred_r_q15[{levels * N}];
+extern const opus_uint16 dred_p0_q15[{levels * N}];
+
+"""
+ )
+
+if __name__ == "__main__":
+
+ model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
+ model.load_weights(args.weights)
+
+
+
+
+ # encoder
+ encoder_dense_names = [
+ 'enc_dense1',
+ 'enc_dense3',
+ 'enc_dense5',
+ 'enc_dense7',
+ 'enc_dense8',
+ 'gdense1',
+ 'gdense2'
+ ]
+
+ encoder_gru_names = [
+ 'enc_dense2',
+ 'enc_dense4',
+ 'enc_dense6'
+ ]
+
+ encoder_conv1d_names = [
+ 'bits_dense'
+ ]
+
+ source_fid = open("dred_rdovae_enc_data.c", 'w')
+ header_fid = open("dred_rdovae_enc_data.h", 'w')
+
+ start_header(header_fid, "dred_rdovae_enc_data.h")
+ start_source(source_fid, "dred_rdovae_enc_data.h", os.path.basename(args.weights))
+
+ header_fid.write(
+f"""
+#include "dred_rdovae_constants.h"
+
+#include "nnet.h"
+"""
+ )
+
+ # dump GRUs
+ max_rnn_neurons_enc = max(
+ [
+ dump_gru_layer(encoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)
+ for name in encoder_gru_names
+ ]
+ )
+
+ # dump conv layers
+ max_conv_inputs = max(
+ [
+ dump_conv1d_layer(encoder.get_layer(name), source_fid, header_fid)
+ for name in encoder_conv1d_names
+ ]
+ )
+
+ # dump Dense layers
+ for name in encoder_dense_names:
+ layer = encoder.get_layer(name)
+ dump_dense_layer(layer, source_fid, header_fid)
+
+ # some global constants
+ header_fid.write(
+f"""
+
+#define DRED_ENC_MAX_RNN_NEURONS {max_rnn_neurons_enc}
+
+#define DRED_ENC_MAX_CONV_INPUTS {max_conv_inputs}
+
+"""
+ )
+
+ finish_header(header_fid)
+ finish_source(source_fid)
+
+ header_fid.close()
+ source_fid.close()
+
+ # statistical model
+ source_fid = open("dred_rdovae_stats_data.c", 'w')
+ header_fid = open("dred_rdovae_stats_data.h", 'w')
+
+ start_header(header_fid, "dred_rdovae_stats_data.h")
+ start_source(source_fid, "dred_rdovae_stats_data.h", os.path.basename(args.weights))
+
+ header_fid.write(
+"""
+
+#include "opus_types.h"
+
+"""
+ )
+
+ dump_statistical_model(qembedding, source_fid, header_fid)
+
+ finish_header(header_fid)
+ finish_source(source_fid)
+
+ header_fid.close()
+ source_fid.close()
+
+ # decoder
+ decoder_dense_names = [
+ 'state1',
+ 'state2',
+ 'state3',
+ 'dec_dense1',
+ 'dec_dense3',
+ 'dec_dense5',
+ 'dec_dense7',
+ 'dec_dense8',
+ 'dec_final'
+ ]
+
+ decoder_gru_names = [
+ 'dec_dense2',
+ 'dec_dense4',
+ 'dec_dense6'
+ ]
+
+ source_fid = open("dred_rdovae_dec_data.c", 'w')
+ header_fid = open("dred_rdovae_dec_data.h", 'w')
+
+ start_header(header_fid, "dred_rdovae_dec_data.h")
+ start_source(source_fid, "dred_rdovae_dec_data.h", os.path.basename(args.weights))
+
+ header_fid.write(
+f"""
+#include "dred_rdovae_constants.h"
+
+#include "nnet.h"
+"""
+ )
+
+
+ # dump GRUs
+ max_rnn_neurons_dec = max(
+ [
+ dump_gru_layer(decoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)
+ for name in decoder_gru_names
+ ]
+ )
+
+ # dump Dense layers
+ for name in decoder_dense_names:
+ layer = decoder.get_layer(name)
+ dump_dense_layer(layer, source_fid, header_fid)
+
+ # some global constants
+ header_fid.write(
+f"""
+
+#define DRED_DEC_MAX_RNN_NEURONS {max_rnn_neurons_dec}
+
+"""
+ )
+
+ finish_header(header_fid)
+ finish_source(source_fid)
+
+ header_fid.close()
+ source_fid.close()
+
+ # common constants
+ header_fid = open("dred_rdovae_constants.h", 'w')
+ start_header(header_fid, "dred_rdovae_constants.h")
+
+ header_fid.write(
+f"""
+#define DRED_NUM_FEATURES 20
+
+#define DRED_LATENT_DIM {args.latent_dim}
+
+#define DRED_STATE_DIM {24}
+
+#define DRED_NUM_QUANTIZATION_LEVELS {qembedding.weights[0].shape[0]}
+
+#define DRED_MAX_RNN_NEURONS {max(max_rnn_neurons_enc, max_rnn_neurons_dec)}
+
+#define DRED_MAX_CONV_INPUTS {max_conv_inputs}
+"""
+ )
+
+ finish_header(header_fid) \ No newline at end of file
diff --git a/dnn/training_tf2/encode_rdovae.py b/dnn/training_tf2/encode_rdovae.py
new file mode 100644
index 00000000..f144fc48
--- /dev/null
+++ b/dnn/training_tf2/encode_rdovae.py
@@ -0,0 +1,125 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+ Copyright (c) 2018-2019 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+# Train an LPCNet model
+
+import argparse
+#from plc_loader import PLCLoader
+
+parser = argparse.ArgumentParser(description='Train a PLC model')
+
+parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
+parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
+parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
+group1 = parser.add_mutually_exclusive_group()
+group1.add_argument('--weights', metavar='<input weights>', help='model weights')
+parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
+parser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)')
+parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
+
+
+args = parser.parse_args()
+
+import importlib
+rdovae = importlib.import_module(args.model)
+
+from rdovae import apply_dead_zone
+
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
+import tensorflow.keras.backend as K
+import h5py
+
+import tensorflow as tf
+from rdovae import pvq_quantize
+
+# Try reducing batch_size if you run out of memory on your GPU
+batch_size = args.batch_size
+
+model, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size)
+model.load_weights(args.weights)
+
+lpc_order = 16
+
+feature_file = args.features
+nb_features = model.nb_used_features + lpc_order
+nb_used_features = model.nb_used_features
+sequence_size = args.seq_length
+
+# u for unquantised, load 16 bit PCM samples and convert to mu-law
+
+
+features = np.memmap(feature_file, dtype='float32', mode='r')
+nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
+features = features[:nb_sequences*sequence_size*nb_features]
+
+features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
+print(features.shape)
+features = features[:, :, :nb_used_features]
+#features = np.random.randn(73600, 1000, 17)
+
+
+bits, gru_state_dec = encoder.predict([features], batch_size=batch_size)
+(gru_state_dec).astype('float32').tofile(args.output + "-state.f32")
+
+
+#dist = rdovae.feat_dist_loss(features, quant_out)
+#rate = rdovae.sq1_rate_loss(features, model_bits)
+#rate2 = rdovae.sq_rate_metric(features, model_bits)
+#print(dist, rate, rate2)
+
+print("shapes are:")
+print(bits.shape)
+print(gru_state_dec.shape)
+
+features.astype('float32').tofile(args.output + "-input.f32")
+#quant_out.astype('float32').tofile(args.output + "-enc_dec.f32")
+nbits=80
+bits.astype('float32').tofile(args.output + "-syms.f32")
+
+lambda_val = 0.0002 * np.ones((nb_sequences, sequence_size//2, 1))
+quant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16')
+quant_id = quant_id[:,:,0]
+quant_embed = qembedding(quant_id)
+quant_scale = tf.math.softplus(quant_embed[:,:,:nbits])
+dead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits])
+
+bits = bits*quant_scale
+bits = np.round(apply_dead_zone([bits, dead_zone]).numpy())
+bits = bits/quant_scale
+
+gru_state_dec = pvq_quantize(gru_state_dec, 82)
+#gru_state_dec = gru_state_dec/(1e-15+tf.norm(gru_state_dec, axis=-1,keepdims=True))
+gru_state_dec = gru_state_dec[:,-1,:]
+dec_out = decoder([bits[:,1::2,:], gru_state_dec])
+
+print(dec_out.shape)
+
+dec_out.numpy().astype('float32').tofile(args.output + "-quant_out.f32")
diff --git a/dnn/training_tf2/fec_encoder.py b/dnn/training_tf2/fec_encoder.py
new file mode 100644
index 00000000..15ef12b2
--- /dev/null
+++ b/dnn/training_tf2/fec_encoder.py
@@ -0,0 +1,256 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe and Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+import os
+import subprocess
+import argparse
+
+
+import numpy as np
+from scipy.io import wavfile
+import tensorflow as tf
+
+from rdovae import new_rdovae_model, pvq_quantize, apply_dead_zone, sq_rate_metric
+from fec_packets import write_fec_packets, read_fec_packets
+
+
+debug = False
+
+if debug:
+ args = type('dummy', (object,),
+ {
+ 'input' : 'item1.wav',
+ 'weights' : 'testout/rdovae_alignment_fix_1024_120.h5',
+ 'enc_lambda' : 0.0007,
+ 'output' : "test_0007.fec",
+ 'cond_size' : 1024,
+ 'num_redundancy_frames' : 64,
+ 'extra_delay' : 0,
+ 'dump_data' : './dump_data'
+ })()
+ os.environ['CUDA_VISIBLE_DEVICES']=""
+else:
+ parser = argparse.ArgumentParser(description='Encode redundancy for Opus neural FEC. Designed for use with voip application and 20ms frames')
+
+ parser.add_argument('input', metavar='<input signal>', help='audio input (.wav or .raw or .pcm as int16)')
+ parser.add_argument('weights', metavar='<weights>', help='trained model file (.h5)')
+# parser.add_argument('enc_lambda', metavar='<lambda>', type=float, help='lambda for controlling encoder rate')
+ parser.add_argument('output', type=str, help='output file (will be extended with .fec)')
+
+ parser.add_argument('--dump-data', type=str, default='./dump_data', help='path to dump data executable (default ./dump_data)')
+ parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
+ parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 40)", default=40)
+ parser.add_argument('--num-redundancy-frames', default=64, type=int, help='number of redundancy frames (20ms) per packet (default 64)')
+ parser.add_argument('--extra-delay', default=0, type=int, help="last features in packet are calculated with the decoder aligned samples, use this option to add extra delay (in samples at 16kHz)")
+ parser.add_argument('--lossfile', type=str, help='file containing loss trace (0 for frame received, 1 for lost)')
+
+ parser.add_argument('--debug-output', action='store_true', help='if set, differently assembled features are written to disk')
+
+ args = parser.parse_args()
+
+model, encoder, decoder, qembedding = new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=1, nb_quant=args.quant_levels, cond_size=args.cond_size)
+model.load_weights(args.weights)
+
+lpc_order = 16
+
+## prepare input signal
+# SILK frame size is 20ms and LPCNet subframes are 10ms
+subframe_size = 160
+frame_size = 2 * subframe_size
+
+# 91 samples delay to align with SILK decoded frames
+silk_delay = 91
+
+# prepend zeros to have enough history to produce the first package
+zero_history = (args.num_redundancy_frames - 1) * frame_size
+
+# dump data has a (feature) delay of 10ms
+dump_data_delay = 160
+
+total_delay = silk_delay + zero_history + args.extra_delay - dump_data_delay
+
+# load signal
+if args.input.endswith('.raw') or args.input.endswith('.pcm') or args.input.endswith('.sw'):
+ signal = np.fromfile(args.input, dtype='int16')
+
+elif args.input.endswith('.wav'):
+ fs, signal = wavfile.read(args.input)
+else:
+ raise ValueError(f'unknown input signal format: {args.input}')
+
+# fill up last frame with zeros
+padded_signal_length = len(signal) + total_delay
+tail = padded_signal_length % frame_size
+right_padding = (frame_size - tail) % frame_size
+
+signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16)))
+
+padded_signal_file = os.path.splitext(args.input)[0] + '_padded.raw'
+signal.tofile(padded_signal_file)
+
+# write signal and call dump_data to create features
+
+feature_file = os.path.splitext(args.input)[0] + '_features.f32'
+command = f"{args.dump_data} -test {padded_signal_file} {feature_file}"
+r = subprocess.run(command, shell=True)
+if r.returncode != 0:
+ raise RuntimeError(f"command '{command}' failed with exit code {r.returncode}")
+
+# load features
+nb_features = model.nb_used_features + lpc_order
+nb_used_features = model.nb_used_features
+
+# load features
+features = np.fromfile(feature_file, dtype='float32')
+num_subframes = len(features) // nb_features
+num_subframes = 2 * (num_subframes // 2)
+num_frames = num_subframes // 2
+
+features = np.reshape(features, (1, -1, nb_features))
+features = features[:, :, :nb_used_features]
+features = features[:, :num_subframes, :]
+
+#variable quantizer depending on the delay
+q0 = 3
+q1 = 15
+quant_id = np.round(q1 + (q0-q1)*np.arange(args.num_redundancy_frames//2)/args.num_redundancy_frames).astype('int16')
+#print(quant_id)
+
+quant_embed = qembedding(quant_id)
+
+# run encoder
+print("running fec encoder...")
+symbols, gru_state_dec = encoder.predict(features)
+
+# apply quantization
+nsymbols = 80
+quant_scale = tf.math.softplus(quant_embed[:, :nsymbols]).numpy()
+dead_zone = tf.math.softplus(quant_embed[:, nsymbols : 2 * nsymbols]).numpy()
+#symbols = apply_dead_zone([symbols, dead_zone]).numpy()
+#qsymbols = np.round(symbols)
+quant_gru_state_dec = pvq_quantize(gru_state_dec, 82)
+
+# rate estimate
+hard_distr_embed = tf.math.sigmoid(quant_embed[:, 4 * nsymbols : ]).numpy()
+#rate_input = np.concatenate((qsymbols, hard_distr_embed, enc_lambda), axis=-1)
+#rates = sq_rate_metric(None, rate_input, reduce=False).numpy()
+
+# run decoder
+input_length = args.num_redundancy_frames // 2
+offset = args.num_redundancy_frames - 1
+
+packets = []
+packet_sizes = []
+
+sym_batch = np.zeros((num_frames-offset, args.num_redundancy_frames//2, nsymbols), dtype='float32')
+quant_state = quant_gru_state_dec[0, offset:num_frames, :]
+#pack symbols for batch processing
+for i in range(offset, num_frames):
+ sym_batch[i-offset, :, :] = symbols[0, i - 2 * input_length + 2 : i + 1 : 2, :]
+
+#quantize symbols
+sym_batch = sym_batch * quant_scale
+sym_batch = apply_dead_zone([sym_batch, dead_zone]).numpy()
+sym_batch = np.round(sym_batch)
+
+hard_distr_embed = np.broadcast_to(hard_distr_embed, (sym_batch.shape[0], sym_batch.shape[1], 2*sym_batch.shape[2]))
+fake_lambda = np.ones((sym_batch.shape[0], sym_batch.shape[1], 1), dtype='float32')
+rate_input = np.concatenate((sym_batch, hard_distr_embed, fake_lambda), axis=-1)
+rates = sq_rate_metric(None, rate_input, reduce=False).numpy()
+#print(rates.shape)
+print("average rate = ", np.mean(rates[args.num_redundancy_frames:,:]))
+
+#sym_batch.tofile('qsyms.f32')
+
+sym_batch = sym_batch / quant_scale
+#print(sym_batch.shape, quant_state.shape)
+#features = decoder.predict([sym_batch, quant_state])
+features = decoder([sym_batch, quant_state])
+
+#for i in range(offset, num_frames):
+# print(f"processing frame {i - offset}...")
+# features = decoder.predict([qsymbols[:, i - 2 * input_length + 2 : i + 1 : 2, :], quant_embed_dec[:, i - 2 * input_length + 2 : i + 1 : 2, :], quant_gru_state_dec[:, i, :]])
+# packets.append(features)
+# packet_size = 8 * int((np.sum(rates[:, i - 2 * input_length + 2 : i + 1 : 2]) + 7) / 8) + 64
+# packet_sizes.append(packet_size)
+
+
+# write packets
+packet_file = args.output + '.fec' if not args.output.endswith('.fec') else args.output
+#write_fec_packets(packet_file, packets, packet_sizes)
+
+
+#print(f"average redundancy rate: {int(round(sum(packet_sizes) / len(packet_sizes) * 50 / 1000))} kbps")
+
+if args.lossfile != None:
+ loss = np.loadtxt(args.lossfile, dtype='int16')
+ fec_out = np.zeros((features.shape[0]*2, features.shape[-1]), dtype='float32')
+ foffset = -2
+ ptr = 0;
+ count = 2;
+ for i in range(features.shape[0]):
+ if (loss[i] == 0) or (i == features.shape[0]-1):
+ fec_out[ptr:ptr+count,:] = features[i, foffset:, :]
+ #print("filled ", count)
+ foffset = -2
+ ptr = ptr+count
+ count = 2
+ else:
+ count = count + 2
+ foffset = foffset - 2
+
+ fec_out_full = np.zeros((fec_out.shape[0], nb_features), dtype=np.float32)
+ fec_out_full[:, :nb_used_features] = fec_out
+
+ fec_out_full.tofile(packet_file[:-4] + f'_fec.f32')
+
+
+#create packets array like in the original version for debugging purposes
+for i in range(offset, num_frames):
+ packets.append(features[i-offset:i-offset+1, :, :])
+
+if args.debug_output:
+ import itertools
+
+ #batches = [2, 4]
+ batches = [4]
+ #offsets = [0, 4, 20]
+ offsets = [0, (args.num_redundancy_frames - 2)*2]
+ # sanity checks
+ # 1. concatenate features at offset 0
+ for batch, offset in itertools.product(batches, offsets):
+
+ stop = packets[0].shape[1] - offset
+ print(batch, offset, stop)
+ test_features = np.concatenate([packet[:,stop - batch: stop, :] for packet in packets[::batch//2]], axis=1)
+
+ test_features_full = np.zeros((test_features.shape[1], nb_features), dtype=np.float32)
+ test_features_full[:, :nb_used_features] = test_features[0, :, :]
+
+ print(f"writing debug output {packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32'}")
+ test_features_full.tofile(packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32')
diff --git a/dnn/training_tf2/fec_packets.c b/dnn/training_tf2/fec_packets.c
new file mode 100644
index 00000000..ee08ba95
--- /dev/null
+++ b/dnn/training_tf2/fec_packets.c
@@ -0,0 +1,142 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include "fec_packets.h"
+
+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index)
+{
+
+ int16_t version;
+ int16_t header_size;
+ int16_t num_packets;
+ int16_t packet_size;
+ int16_t subframe_size;
+ int16_t subframes_per_packet;
+ int16_t num_features;
+ long offset;
+
+ FILE *fid = fopen(filename, "rb");
+
+ /* read header */
+ if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
+ if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
+ if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
+ if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
+ if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
+ if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
+ if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
+
+ /* check if indices are valid */
+ if (packet_index >= num_packets || subframe_index >= subframes_per_packet)
+ {
+ fprintf(stderr, "get_fec_frame: index out of bounds\n");
+ goto error;
+ }
+
+ /* calculate offset in file (+ 2 is for rate) */
+ offset = header_size + packet_index * packet_size + 2 + subframe_index * subframe_size;
+ fseek(fid, offset, SEEK_SET);
+
+ /* read features */
+ if (fread(features, sizeof(*features), num_features, fid) != num_features) goto error;
+
+ fclose(fid);
+ return 0;
+
+error:
+ fclose(fid);
+ return 1;
+}
+
+int get_fec_rate(const char * const filename, int packet_index)
+{
+ int16_t version;
+ int16_t header_size;
+ int16_t num_packets;
+ int16_t packet_size;
+ int16_t subframe_size;
+ int16_t subframes_per_packet;
+ int16_t num_features;
+ long offset;
+ int16_t rate;
+
+ FILE *fid = fopen(filename, "rb");
+
+ /* read header */
+ if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
+ if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
+ if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
+ if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
+ if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
+ if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
+ if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
+
+ /* check if indices are valid */
+ if (packet_index >= num_packets)
+ {
+ fprintf(stderr, "get_fec_rate: index out of bounds\n");
+ goto error;
+ }
+
+ /* calculate offset in file (+ 2 is for rate) */
+ offset = header_size + packet_index * packet_size;
+ fseek(fid, offset, SEEK_SET);
+
+ /* read rate */
+ if (fread(&rate, sizeof(rate), 1, fid) != 1) goto error;
+
+ fclose(fid);
+ return (int) rate;
+
+error:
+ fclose(fid);
+ return -1;
+}
+
+#if 0
+int main()
+{
+ float features[20];
+ int i;
+
+ if (get_fec_frame("../test.fec", &features[0], 0, 127))
+ {
+ return 1;
+ }
+
+ for (i = 0; i < 20; i ++)
+ {
+ printf("%d %f\n", i, features[i]);
+ }
+
+ printf("rate: %d\n", get_fec_rate("../test.fec", 0));
+
+}
+#endif \ No newline at end of file
diff --git a/dnn/training_tf2/fec_packets.h b/dnn/training_tf2/fec_packets.h
new file mode 100644
index 00000000..01b128b1
--- /dev/null
+++ b/dnn/training_tf2/fec_packets.h
@@ -0,0 +1,34 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FEC_PACKETS_H
+#define FEC_PACKETS_H
+
+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index);
+int get_fec_rate(const char * const filename, int packet_index);
+
+#endif
diff --git a/dnn/training_tf2/fec_packets.py b/dnn/training_tf2/fec_packets.py
new file mode 100644
index 00000000..6acbe9d2
--- /dev/null
+++ b/dnn/training_tf2/fec_packets.py
@@ -0,0 +1,108 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import numpy as np
+
+
+
+def write_fec_packets(filename, packets, rates=None):
+ """ writes packets in binary format """
+
+ assert np.dtype(np.float32).itemsize == 4
+ assert np.dtype(np.int16).itemsize == 2
+
+ # derive some sizes
+ num_packets = len(packets)
+ subframes_per_packet = packets[0].shape[-2]
+ num_features = packets[0].shape[-1]
+
+ # size of float is 4
+ subframe_size = num_features * 4
+ packet_size = subframe_size * subframes_per_packet + 2 # two bytes for rate
+
+ version = 1
+ # header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features)
+ header_size = 14
+
+ with open(filename, 'wb') as f:
+
+ # header
+ f.write(np.int16(version).tobytes())
+ f.write(np.int16(header_size).tobytes())
+ f.write(np.int16(num_packets).tobytes())
+ f.write(np.int16(packet_size).tobytes())
+ f.write(np.int16(subframe_size).tobytes())
+ f.write(np.int16(subframes_per_packet).tobytes())
+ f.write(np.int16(num_features).tobytes())
+
+ # packets
+ for i, packet in enumerate(packets):
+ if type(rates) == type(None):
+ rate = 0
+ else:
+ rate = rates[i]
+
+ f.write(np.int16(rate).tobytes())
+
+ features = np.flip(packet, axis=-2)
+ f.write(features.astype(np.float32).tobytes())
+
+
+def read_fec_packets(filename):
+ """ reads packets from binary format """
+
+ assert np.dtype(np.float32).itemsize == 4
+ assert np.dtype(np.int16).itemsize == 2
+
+ with open(filename, 'rb') as f:
+
+ # header
+ version = np.frombuffer(f.read(2), dtype=np.int16).item()
+ header_size = np.frombuffer(f.read(2), dtype=np.int16).item()
+ num_packets = np.frombuffer(f.read(2), dtype=np.int16).item()
+ packet_size = np.frombuffer(f.read(2), dtype=np.int16).item()
+ subframe_size = np.frombuffer(f.read(2), dtype=np.int16).item()
+ subframes_per_packet = np.frombuffer(f.read(2), dtype=np.int16).item()
+ num_features = np.frombuffer(f.read(2), dtype=np.int16).item()
+
+ dummy_features = np.zeros((1, subframes_per_packet, num_features), dtype=np.float32)
+
+ # packets
+ rates = []
+ packets = []
+ for i in range(num_packets):
+
+ rate = np.frombuffer(f.read(2), dtype=np.int16).item
+ rates.append(rate)
+
+ features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape)
+ packet = np.flip(features, axis=-2)
+ packets.append(packet)
+
+ return packets \ No newline at end of file
diff --git a/dnn/training_tf2/keraslayerdump.py b/dnn/training_tf2/keraslayerdump.py
new file mode 100644
index 00000000..5abc1488
--- /dev/null
+++ b/dnn/training_tf2/keraslayerdump.py
@@ -0,0 +1,189 @@
+'''Copyright (c) 2017-2018 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+""" helper functions for dumping some Keras layers to C files """
+
+import numpy as np
+
+
+def printVector(f, vector, name, dtype='float', dotp=False, static=True):
+ """ prints vector as one-dimensional C array """
+ if dotp:
+ vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
+ vector = vector.transpose((2, 0, 3, 1))
+ v = np.reshape(vector, (-1))
+ if static:
+ f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
+ else:
+ f.write('const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
+ for i in range(0, len(v)):
+ f.write('{}'.format(v[i]))
+ if (i!=len(v)-1):
+ f.write(',')
+ else:
+ break;
+ if (i%8==7):
+ f.write("\n ")
+ else:
+ f.write(" ")
+ f.write('\n};\n\n')
+ return vector
+
+def printSparseVector(f, A, name, have_diag=True):
+ N = A.shape[0]
+ M = A.shape[1]
+ W = np.zeros((0,), dtype='int')
+ W0 = np.zeros((0,))
+ if have_diag:
+ diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
+ A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
+ A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
+ A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
+ printVector(f, diag, name + '_diag')
+ AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
+ idx = np.zeros((0,), dtype='int')
+ for i in range(M//8):
+ pos = idx.shape[0]
+ idx = np.append(idx, -1)
+ nb_nonzero = 0
+ for j in range(N//4):
+ block = A[j*4:(j+1)*4, i*8:(i+1)*8]
+ qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
+ if np.sum(np.abs(block)) > 1e-10:
+ nb_nonzero = nb_nonzero + 1
+ idx = np.append(idx, j*4)
+ vblock = qblock.transpose((1,0)).reshape((-1,))
+ W0 = np.concatenate([W0, block.reshape((-1,))])
+ W = np.concatenate([W, vblock])
+ idx[pos] = nb_nonzero
+ f.write('#ifdef DOT_PROD\n')
+ printVector(f, W, name, dtype='qweight')
+ f.write('#else /*DOT_PROD*/\n')
+ printVector(f, W0, name, dtype='qweight')
+ f.write('#endif /*DOT_PROD*/\n')
+ printVector(f, idx, name + '_idx', dtype='int')
+ return AQ
+
+def dump_sparse_gru(self, f, hf):
+ name = 'sparse_' + self.name
+ print("printing layer " + name + " of type sparse " + self.__class__.__name__)
+ weights = self.get_weights()
+ qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
+ printVector(f, weights[-1], name + '_bias')
+ subias = weights[-1].copy()
+ subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
+ printVector(f, subias, name + '_subias')
+ if hasattr(self, 'activation'):
+ activation = self.activation.__name__.upper()
+ else:
+ activation = 'TANH'
+ if hasattr(self, 'reset_after') and not self.reset_after:
+ reset_after = 0
+ else:
+ reset_after = 1
+ neurons = weights[0].shape[1]//3
+ max_rnn_neurons = neurons
+ f.write('const SparseGRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_recurrent_weights_diag,\n {}_recurrent_weights,\n {}_recurrent_weights_idx,\n {}, ACTIVATION_{}, {}\n}};\n\n'
+ .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ hf.write('extern const SparseGRULayer {};\n\n'.format(name));
+ return max_rnn_neurons
+
+def dump_gru_layer(self, f, hf, dotp=False, sparse=False):
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()
+ if sparse:
+ qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)
+ else:
+ qweight = printVector(f, weights[0], name + '_weights')
+
+ if dotp:
+ f.write('#ifdef DOT_PROD\n')
+ qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
+ printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
+ f.write('#else /*DOT_PROD*/\n')
+ else:
+ qweight2 = weights[1]
+
+ printVector(f, weights[1], name + '_recurrent_weights')
+ if dotp:
+ f.write('#endif /*DOT_PROD*/\n')
+
+ printVector(f, weights[-1], name + '_bias')
+ subias = weights[-1].copy()
+ subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
+ subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
+ printVector(f, subias, name + '_subias')
+ if hasattr(self, 'activation'):
+ activation = self.activation.__name__.upper()
+ else:
+ activation = 'TANH'
+ if hasattr(self, 'reset_after') and not self.reset_after:
+ reset_after = 0
+ else:
+ reset_after = 1
+ neurons = weights[0].shape[1]//3
+ max_rnn_neurons = neurons
+ f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n {},\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n'
+ .format(name, name, name, name, name + "_weights_idx" if sparse else "NULL", name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+ hf.write('extern const GRULayer {};\n\n'.format(name));
+ return max_rnn_neurons
+
+def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
+ printVector(f, weights, name + '_weights')
+ printVector(f, bias, name + '_bias')
+ f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n'
+ .format(name, name, name, weights.shape[0], weights.shape[1], activation))
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
+ hf.write('extern const DenseLayer {};\n\n'.format(name));
+
+def dump_dense_layer(self, f, hf):
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()
+ activation = self.activation.__name__.upper()
+ dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
+ return False
+
+def dump_conv1d_layer(self, f, hf):
+ name = self.name
+ print("printing layer " + name + " of type " + self.__class__.__name__)
+ weights = self.get_weights()
+ printVector(f, weights[0], name + '_weights')
+ printVector(f, weights[-1], name + '_bias')
+ activation = self.activation.__name__.upper()
+ max_conv_inputs = weights[0].shape[1]*weights[0].shape[0]
+ f.write('const Conv1DLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, {}, ACTIVATION_{}\n}};\n\n'
+ .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
+ hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
+ hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
+ hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
+ hf.write('extern const Conv1DLayer {};\n\n'.format(name));
+ return max_conv_inputs
diff --git a/dnn/training_tf2/lossfuncs.py b/dnn/training_tf2/lossfuncs.py
new file mode 100644
index 00000000..78be1fd6
--- /dev/null
+++ b/dnn/training_tf2/lossfuncs.py
@@ -0,0 +1,99 @@
+"""
+Custom Loss functions and metrics for training/analysis
+"""
+
+from tf_funcs import *
+import tensorflow as tf
+
+# The following loss functions all expect the lpcnet model to output the lpc prediction
+
+# Computing the excitation by subtracting the lpc prediction from the target, followed by minimizing the cross entropy
+def res_from_sigloss():
+ def loss(y_true,y_pred):
+ p = y_pred[:,:,0:1]
+ model_out = y_pred[:,:,2:]
+ e_gt = tf_l2u(y_true - p)
+ e_gt = tf.round(e_gt)
+ e_gt = tf.cast(e_gt,'int32')
+ sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,model_out)
+ return sparse_cel
+ return loss
+
+# Interpolated and Compensated Loss (In case of end to end lpcnet)
+# Interpolates between adjacent embeddings based on the fractional value of the excitation computed (similar to the embedding interpolation)
+# Also adds a probability compensation (to account for matching cross entropy in the linear domain), weighted by gamma
+def interp_mulaw(gamma = 1):
+ def loss(y_true,y_pred):
+ y_true = tf.cast(y_true, 'float32')
+ p = y_pred[:,:,0:1]
+ real_p = y_pred[:,:,1:2]
+ model_out = y_pred[:,:,2:]
+ e_gt = tf_l2u(y_true - p)
+ exc_gt = tf_l2u(y_true - real_p)
+ prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0))
+ regularization = tf.squeeze((K.abs(exc_gt - 128)/128.0)*K.log(256.0))
+ alpha = e_gt - tf.math.floor(e_gt)
+ alpha = tf.tile(alpha,[1,1,256])
+ e_gt = tf.cast(e_gt,'int32')
+ e_gt = tf.clip_by_value(e_gt,0,254)
+ interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)
+ sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)
+ loss_mod = sparse_cel + prob_compensation + gamma*regularization
+ return loss_mod
+ return loss
+
+# Same as above, except a metric
+def metric_oginterploss(y_true,y_pred):
+ p = y_pred[:,:,0:1]
+ model_out = y_pred[:,:,2:]
+ e_gt = tf_l2u(y_true - p)
+ prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0))
+ alpha = e_gt - tf.math.floor(e_gt)
+ alpha = tf.tile(alpha,[1,1,256])
+ e_gt = tf.cast(e_gt,'int32')
+ e_gt = tf.clip_by_value(e_gt,0,254)
+ interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)
+ sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)
+ loss_mod = sparse_cel + prob_compensation
+ return loss_mod
+
+# Interpolated cross entropy loss metric
+def metric_icel(y_true, y_pred):
+ p = y_pred[:,:,0:1]
+ model_out = y_pred[:,:,2:]
+ e_gt = tf_l2u(y_true - p)
+ alpha = e_gt - tf.math.floor(e_gt)
+ alpha = tf.tile(alpha,[1,1,256])
+ e_gt = tf.cast(e_gt,'int32')
+ e_gt = tf.clip_by_value(e_gt,0,254) #Check direction
+ interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1)
+ sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab)
+ return sparse_cel
+
+# Non-interpolated (rounded) cross entropy loss metric
+def metric_cel(y_true, y_pred):
+ y_true = tf.cast(y_true, 'float32')
+ p = y_pred[:,:,0:1]
+ model_out = y_pred[:,:,2:]
+ e_gt = tf_l2u(y_true - p)
+ e_gt = tf.round(e_gt)
+ e_gt = tf.cast(e_gt,'int32')
+ e_gt = tf.clip_by_value(e_gt,0,255)
+ sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,model_out)
+ return sparse_cel
+
+# Variance metric of the output excitation
+def metric_exc_sd(y_true,y_pred):
+ p = y_pred[:,:,0:1]
+ e_gt = tf_l2u(y_true - p)
+ sd_egt = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)(e_gt,128)
+ return sd_egt
+
+def loss_matchlar():
+ def loss(y_true,y_pred):
+ model_rc = y_pred[:,:,:16]
+ #y_true = lpc2rc(y_true)
+ loss_lar_diff = K.log((1.01 + model_rc)/(1.01 - model_rc)) - K.log((1.01 + y_true)/(1.01 - y_true))
+ loss_lar_diff = tf.square(loss_lar_diff)
+ return tf.reduce_mean(loss_lar_diff, axis=-1)
+ return loss
diff --git a/dnn/training_tf2/lpcnet.py b/dnn/training_tf2/lpcnet.py
new file mode 100644
index 00000000..497f7572
--- /dev/null
+++ b/dnn/training_tf2/lpcnet.py
@@ -0,0 +1,339 @@
+#!/usr/bin/python3
+'''Copyright (c) 2018 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+import math
+import tensorflow as tf
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise
+from tensorflow.compat.v1.keras.layers import CuDNNGRU
+from tensorflow.keras import backend as K
+from tensorflow.keras.constraints import Constraint
+from tensorflow.keras.initializers import Initializer
+from tensorflow.keras.callbacks import Callback
+from mdense import MDense
+import numpy as np
+import h5py
+import sys
+from tf_funcs import *
+from diffembed import diff_Embed
+from parameters import set_parameter
+
+frame_size = 160
+pcm_bits = 8
+embed_size = 128
+pcm_levels = 2**pcm_bits
+
+def interleave(p, samples):
+ p2=tf.expand_dims(p, 3)
+ nb_repeats = pcm_levels//(2*p.shape[2])
+ p3 = tf.reshape(tf.repeat(tf.concat([1-p2, p2], 3), nb_repeats), (-1, samples, pcm_levels))
+ return p3
+
+def tree_to_pdf(p, samples):
+ return interleave(p[:,:,1:2], samples) * interleave(p[:,:,2:4], samples) * interleave(p[:,:,4:8], samples) * interleave(p[:,:,8:16], samples) \
+ * interleave(p[:,:,16:32], samples) * interleave(p[:,:,32:64], samples) * interleave(p[:,:,64:128], samples) * interleave(p[:,:,128:256], samples)
+
+def tree_to_pdf_train(p):
+ #FIXME: try not to hardcode the 2400 samples (15 frames * 160 samples/frame)
+ return tree_to_pdf(p, 2400)
+
+def tree_to_pdf_infer(p):
+ return tree_to_pdf(p, 1)
+
+def quant_regularizer(x):
+ Q = 128
+ Q_1 = 1./Q
+ #return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
+ return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
+
+class Sparsify(Callback):
+ def __init__(self, t_start, t_end, interval, density, quantize=False):
+ super(Sparsify, self).__init__()
+ self.batch = 0
+ self.t_start = t_start
+ self.t_end = t_end
+ self.interval = interval
+ self.final_density = density
+ self.quantize = quantize
+
+ def on_batch_end(self, batch, logs=None):
+ #print("batch number", self.batch)
+ self.batch += 1
+ if self.quantize or (self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end:
+ #print("constrain");
+ layer = self.model.get_layer('gru_a')
+ w = layer.get_weights()
+ p = w[1]
+ nb = p.shape[1]//p.shape[0]
+ N = p.shape[0]
+ #print("nb = ", nb, ", N = ", N);
+ #print(p.shape)
+ #print ("density = ", density)
+ for k in range(nb):
+ density = self.final_density[k]
+ if self.batch < self.t_end and not self.quantize:
+ r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)
+ density = 1 - (1-self.final_density[k])*(1 - r*r*r)
+ A = p[:, k*N:(k+1)*N]
+ A = A - np.diag(np.diag(A))
+ #This is needed because of the CuDNNGRU strange weight ordering
+ A = np.transpose(A, (1, 0))
+ L=np.reshape(A, (N//4, 4, N//8, 8))
+ S=np.sum(L*L, axis=-1)
+ S=np.sum(S, axis=1)
+ SS=np.sort(np.reshape(S, (-1,)))
+ thresh = SS[round(N*N//32*(1-density))]
+ mask = (S>=thresh).astype('float32')
+ mask = np.repeat(mask, 4, axis=0)
+ mask = np.repeat(mask, 8, axis=1)
+ mask = np.minimum(1, mask + np.diag(np.ones((N,))))
+ #This is needed because of the CuDNNGRU strange weight ordering
+ mask = np.transpose(mask, (1, 0))
+ p[:, k*N:(k+1)*N] = p[:, k*N:(k+1)*N]*mask
+ #print(thresh, np.mean(mask))
+ if self.quantize and ((self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end):
+ if self.batch < self.t_end:
+ threshold = .5*(self.batch - self.t_start)/(self.t_end - self.t_start)
+ else:
+ threshold = .5
+ quant = np.round(p*128.)
+ res = p*128.-quant
+ mask = (np.abs(res) <= threshold).astype('float32')
+ p = mask/128.*quant + (1-mask)*p
+
+ w[1] = p
+ layer.set_weights(w)
+
+class SparsifyGRUB(Callback):
+ def __init__(self, t_start, t_end, interval, grua_units, density, quantize=False):
+ super(SparsifyGRUB, self).__init__()
+ self.batch = 0
+ self.t_start = t_start
+ self.t_end = t_end
+ self.interval = interval
+ self.final_density = density
+ self.grua_units = grua_units
+ self.quantize = quantize
+
+ def on_batch_end(self, batch, logs=None):
+ #print("batch number", self.batch)
+ self.batch += 1
+ if self.quantize or (self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end:
+ #print("constrain");
+ layer = self.model.get_layer('gru_b')
+ w = layer.get_weights()
+ p = w[0]
+ N = p.shape[0]
+ M = p.shape[1]//3
+ for k in range(3):
+ density = self.final_density[k]
+ if self.batch < self.t_end and not self.quantize:
+ r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start)
+ density = 1 - (1-self.final_density[k])*(1 - r*r*r)
+ A = p[:, k*M:(k+1)*M]
+ #This is needed because of the CuDNNGRU strange weight ordering
+ A = np.reshape(A, (M, N))
+ A = np.transpose(A, (1, 0))
+ N2 = self.grua_units
+ A2 = A[:N2, :]
+ L=np.reshape(A2, (N2//4, 4, M//8, 8))
+ S=np.sum(L*L, axis=-1)
+ S=np.sum(S, axis=1)
+ SS=np.sort(np.reshape(S, (-1,)))
+ thresh = SS[round(M*N2//32*(1-density))]
+ mask = (S>=thresh).astype('float32')
+ mask = np.repeat(mask, 4, axis=0)
+ mask = np.repeat(mask, 8, axis=1)
+ A = np.concatenate([A2*mask, A[N2:,:]], axis=0)
+ #This is needed because of the CuDNNGRU strange weight ordering
+ A = np.transpose(A, (1, 0))
+ A = np.reshape(A, (N, M))
+ p[:, k*M:(k+1)*M] = A
+ #print(thresh, np.mean(mask))
+ if self.quantize and ((self.batch > self.t_start and (self.batch-self.t_start) % self.interval == 0) or self.batch >= self.t_end):
+ if self.batch < self.t_end:
+ threshold = .5*(self.batch - self.t_start)/(self.t_end - self.t_start)
+ else:
+ threshold = .5
+ quant = np.round(p*128.)
+ res = p*128.-quant
+ mask = (np.abs(res) <= threshold).astype('float32')
+ p = mask/128.*quant + (1-mask)*p
+
+ w[0] = p
+ layer.set_weights(w)
+
+
+class PCMInit(Initializer):
+ def __init__(self, gain=.1, seed=None):
+ self.gain = gain
+ self.seed = seed
+
+ def __call__(self, shape, dtype=None):
+ num_rows = 1
+ for dim in shape[:-1]:
+ num_rows *= dim
+ num_cols = shape[-1]
+ flat_shape = (num_rows, num_cols)
+ if self.seed is not None:
+ np.random.seed(self.seed)
+ a = np.random.uniform(-1.7321, 1.7321, flat_shape)
+ #a[:,0] = math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows
+ #a[:,1] = .5*a[:,0]*a[:,0]*a[:,0]
+ a = a + np.reshape(math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows, (num_rows, 1))
+ return self.gain * a.astype("float32")
+
+ def get_config(self):
+ return {
+ 'gain': self.gain,
+ 'seed': self.seed
+ }
+
+class WeightClip(Constraint):
+ '''Clips the weights incident to each hidden unit to be inside a range
+ '''
+ def __init__(self, c=2):
+ self.c = c
+
+ def __call__(self, p):
+ # Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
+ # saturation when implementing dot products with SSSE3 or AVX2.
+ return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
+ #return K.clip(p, -self.c, self.c)
+
+ def get_config(self):
+ return {'name': self.__class__.__name__,
+ 'c': self.c}
+
+constraint = WeightClip(0.992)
+
+def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_size=128, training=False, adaptation=False, quantize=False, flag_e2e = False, cond_size=128, lpc_order=16, lpc_gamma=1., lookahead=2):
+ pcm = Input(shape=(None, 1), batch_size=batch_size)
+ dpcm = Input(shape=(None, 3), batch_size=batch_size)
+ feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
+ pitch = Input(shape=(None, 1), batch_size=batch_size)
+ dec_feat = Input(shape=(None, cond_size))
+ dec_state1 = Input(shape=(rnn_units1,))
+ dec_state2 = Input(shape=(rnn_units2,))
+
+ padding = 'valid' if training else 'same'
+ fconv1 = Conv1D(cond_size, 3, padding=padding, activation='tanh', name='feature_conv1')
+ fconv2 = Conv1D(cond_size, 3, padding=padding, activation='tanh', name='feature_conv2')
+ pembed = Embedding(256, 64, name='embed_pitch')
+ cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))])
+
+ cfeat = fconv2(fconv1(cat_feat))
+
+ fdense1 = Dense(cond_size, activation='tanh', name='feature_dense1')
+ fdense2 = Dense(cond_size, activation='tanh', name='feature_dense2')
+
+ if flag_e2e and quantize:
+ fconv1.trainable = False
+ fconv2.trainable = False
+ fdense1.trainable = False
+ fdense2.trainable = False
+
+ cfeat = fdense2(fdense1(cfeat))
+
+ error_calc = Lambda(lambda x: tf_l2u(x[0] - tf.roll(x[1],1,axis = 1)))
+ if flag_e2e:
+ lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat)
+ else:
+ lpcoeffs = Input(shape=(None, lpc_order), batch_size=batch_size)
+
+ real_preds = diff_pred(name = "real_lpc2preds")([pcm,lpcoeffs])
+ weighting = lpc_gamma ** np.arange(1, 17).astype('float32')
+ weighted_lpcoeffs = Lambda(lambda x: x[0]*x[1])([lpcoeffs, weighting])
+ tensor_preds = diff_pred(name = "lpc2preds")([pcm,weighted_lpcoeffs])
+ past_errors = error_calc([pcm,tensor_preds])
+
+ embed = diff_Embed(name='embed_sig',initializer = PCMInit())
+ cpcm = Concatenate()([tf_l2u(pcm),tf_l2u(tensor_preds),past_errors])
+ cpcm = GaussianNoise(.3)(cpcm)
+ cpcm = Reshape((-1, embed_size*3))(embed(cpcm))
+ cpcm_decoder = Reshape((-1, embed_size*3))(embed(dpcm))
+
+
+ rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))
+
+ quant = quant_regularizer if quantize else None
+
+ if training:
+ rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a', stateful=True,
+ recurrent_constraint = constraint, recurrent_regularizer=quant)
+ rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b', stateful=True,
+ kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
+ else:
+ rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a', stateful=True,
+ recurrent_constraint = constraint, recurrent_regularizer=quant)
+ rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b', stateful=True,
+ kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
+
+ rnn_in = Concatenate()([cpcm, rep(cfeat)])
+ md = MDense(pcm_levels, activation='sigmoid', name='dual_fc')
+ gru_out1, _ = rnn(rnn_in)
+ gru_out1 = GaussianNoise(.005)(gru_out1)
+ gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)]))
+ ulaw_prob = Lambda(tree_to_pdf_train)(md(gru_out2))
+
+ if adaptation:
+ rnn.trainable=False
+ rnn2.trainable=False
+ md.trainable=False
+ embed.Trainable=False
+
+ m_out = Concatenate(name='pdf')([tensor_preds,real_preds,ulaw_prob])
+ if not flag_e2e:
+ model = Model([pcm, feat, pitch, lpcoeffs], m_out)
+ else:
+ model = Model([pcm, feat, pitch], [m_out, cfeat])
+ model.rnn_units1 = rnn_units1
+ model.rnn_units2 = rnn_units2
+ model.nb_used_features = nb_used_features
+ model.frame_size = frame_size
+
+ if not flag_e2e:
+ encoder = Model([feat, pitch], cfeat)
+ dec_rnn_in = Concatenate()([cpcm_decoder, dec_feat])
+ else:
+ encoder = Model([feat, pitch], [cfeat,lpcoeffs])
+ dec_rnn_in = Concatenate()([cpcm_decoder, dec_feat])
+ dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1)
+ dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]), initial_state=dec_state2)
+ dec_ulaw_prob = Lambda(tree_to_pdf_infer)(md(dec_gru_out2))
+
+ if flag_e2e:
+ decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])
+ else:
+ decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2])
+
+ # add parameters to model
+ set_parameter(model, 'lpc_gamma', lpc_gamma, dtype='float64')
+ set_parameter(model, 'flag_e2e', flag_e2e, dtype='bool')
+ set_parameter(model, 'lookahead', lookahead, dtype='int32')
+
+ return model, encoder, decoder
diff --git a/dnn/training_tf2/lpcnet_plc.py b/dnn/training_tf2/lpcnet_plc.py
new file mode 100644
index 00000000..618e0084
--- /dev/null
+++ b/dnn/training_tf2/lpcnet_plc.py
@@ -0,0 +1,101 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+ Copyright (c) 2018-2019 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+import math
+import tensorflow as tf
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise
+from tensorflow.compat.v1.keras.layers import CuDNNGRU
+from tensorflow.keras import backend as K
+from tensorflow.keras.constraints import Constraint
+from tensorflow.keras.initializers import Initializer
+from tensorflow.keras.callbacks import Callback
+import numpy as np
+
+def quant_regularizer(x):
+ Q = 128
+ Q_1 = 1./Q
+ #return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
+ return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
+
+
+class WeightClip(Constraint):
+ '''Clips the weights incident to each hidden unit to be inside a range
+ '''
+ def __init__(self, c=2):
+ self.c = c
+
+ def __call__(self, p):
+ # Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
+ # saturation when implementing dot products with SSSE3 or AVX2.
+ return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
+ #return K.clip(p, -self.c, self.c)
+
+ def get_config(self):
+ return {'name': self.__class__.__name__,
+ 'c': self.c}
+
+constraint = WeightClip(0.992)
+
+def new_lpcnet_plc_model(rnn_units=256, nb_used_features=20, nb_burg_features=36, batch_size=128, training=False, adaptation=False, quantize=False, cond_size=128):
+ feat = Input(shape=(None, nb_used_features+nb_burg_features), batch_size=batch_size)
+ lost = Input(shape=(None, 1), batch_size=batch_size)
+
+ fdense1 = Dense(cond_size, activation='tanh', name='plc_dense1')
+
+ cfeat = Concatenate()([feat, lost])
+ cfeat = fdense1(cfeat)
+ #cfeat = Conv1D(cond_size, 3, padding='causal', activation='tanh', name='plc_conv1')(cfeat)
+
+ quant = quant_regularizer if quantize else None
+
+ if training:
+ rnn = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru1', stateful=True,
+ kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
+ rnn2 = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru2', stateful=True,
+ kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
+ else:
+ rnn = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru1', stateful=True,
+ kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
+ rnn2 = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru2', stateful=True,
+ kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
+
+ gru_out1, _ = rnn(cfeat)
+ gru_out1 = GaussianNoise(.005)(gru_out1)
+ gru_out2, _ = rnn2(gru_out1)
+
+ out_dense = Dense(nb_used_features, activation='linear', name='plc_out')
+ plc_out = out_dense(gru_out2)
+
+ model = Model([feat, lost], plc_out)
+ model.rnn_units = rnn_units
+ model.cond_size = cond_size
+ model.nb_used_features = nb_used_features
+ model.nb_burg_features = nb_burg_features
+
+ return model
diff --git a/dnn/training_tf2/mdense.py b/dnn/training_tf2/mdense.py
new file mode 100644
index 00000000..844ae23e
--- /dev/null
+++ b/dnn/training_tf2/mdense.py
@@ -0,0 +1,95 @@
+from tensorflow.keras import backend as K
+from tensorflow.keras.layers import Layer, InputSpec
+from tensorflow.keras import activations
+from tensorflow.keras import initializers, regularizers, constraints
+import numpy as np
+import math
+
+class MDense(Layer):
+
+ def __init__(self, outputs,
+ channels=2,
+ activation=None,
+ use_bias=True,
+ kernel_initializer='glorot_uniform',
+ bias_initializer='zeros',
+ kernel_regularizer=None,
+ bias_regularizer=None,
+ activity_regularizer=None,
+ kernel_constraint=None,
+ bias_constraint=None,
+ **kwargs):
+ if 'input_shape' not in kwargs and 'input_dim' in kwargs:
+ kwargs['input_shape'] = (kwargs.pop('input_dim'),)
+ super(MDense, self).__init__(**kwargs)
+ self.units = outputs
+ self.channels = channels
+ self.activation = activations.get(activation)
+ self.use_bias = use_bias
+ self.kernel_initializer = initializers.get(kernel_initializer)
+ self.bias_initializer = initializers.get(bias_initializer)
+ self.kernel_regularizer = regularizers.get(kernel_regularizer)
+ self.bias_regularizer = regularizers.get(bias_regularizer)
+ self.activity_regularizer = regularizers.get(activity_regularizer)
+ self.kernel_constraint = constraints.get(kernel_constraint)
+ self.bias_constraint = constraints.get(bias_constraint)
+ self.input_spec = InputSpec(min_ndim=2)
+ self.supports_masking = True
+
+ def build(self, input_shape):
+ assert len(input_shape) >= 2
+ input_dim = input_shape[-1]
+
+ self.kernel = self.add_weight(shape=(self.units, input_dim, self.channels),
+ initializer=self.kernel_initializer,
+ name='kernel',
+ regularizer=self.kernel_regularizer,
+ constraint=self.kernel_constraint)
+ if self.use_bias:
+ self.bias = self.add_weight(shape=(self.units, self.channels),
+ initializer=self.bias_initializer,
+ name='bias',
+ regularizer=self.bias_regularizer,
+ constraint=self.bias_constraint)
+ else:
+ self.bias = None
+ self.factor = self.add_weight(shape=(self.units, self.channels),
+ initializer='ones',
+ name='factor',
+ regularizer=self.bias_regularizer,
+ constraint=self.bias_constraint)
+ self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
+ self.built = True
+
+ def call(self, inputs):
+ output = K.dot(inputs, self.kernel)
+ if self.use_bias:
+ output = output + self.bias
+ output = K.tanh(output) * self.factor
+ output = K.sum(output, axis=-1)
+ if self.activation is not None:
+ output = self.activation(output)
+ return output
+
+ def compute_output_shape(self, input_shape):
+ assert input_shape and len(input_shape) >= 2
+ assert input_shape[-1]
+ output_shape = list(input_shape)
+ output_shape[-1] = self.units
+ return tuple(output_shape)
+
+ def get_config(self):
+ config = {
+ 'units': self.units,
+ 'activation': activations.serialize(self.activation),
+ 'use_bias': self.use_bias,
+ 'kernel_initializer': initializers.serialize(self.kernel_initializer),
+ 'bias_initializer': initializers.serialize(self.bias_initializer),
+ 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
+ 'bias_regularizer': regularizers.serialize(self.bias_regularizer),
+ 'activity_regularizer': regularizers.serialize(self.activity_regularizer),
+ 'kernel_constraint': constraints.serialize(self.kernel_constraint),
+ 'bias_constraint': constraints.serialize(self.bias_constraint)
+ }
+ base_config = super(MDense, self).get_config()
+ return dict(list(base_config.items()) + list(config.items()))
diff --git a/dnn/training_tf2/pade.py b/dnn/training_tf2/pade.py
new file mode 100644
index 00000000..f88f425c
--- /dev/null
+++ b/dnn/training_tf2/pade.py
@@ -0,0 +1,70 @@
+# Optimizing a rational function to optimize a tanh() approximation
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation
+import tensorflow.keras.backend as K
+from tensorflow.keras.optimizers import Adam, SGD
+
+def my_loss1(y_true, y_pred):
+ return 1*K.mean(K.square(y_true-y_pred)) + 1*K.max(K.square(y_true-y_pred), axis=1)
+
+def my_loss2(y_true, y_pred):
+ return .1*K.mean(K.square(y_true-y_pred)) + 1*K.max(K.square(y_true-y_pred), axis=1)
+
+def my_loss3(y_true, y_pred):
+ return .01*K.mean(K.square(y_true-y_pred)) + 1*K.max(K.square(y_true-y_pred), axis=1)
+
+# Using these initializers to seed the approximation
+# with a reasonable starting point
+def num_init(shape, dtype=None):
+ rr = tf.constant([[945], [105], [1]], dtype=dtype)
+ #rr = tf.constant([[946.56757], [98.01368], [0.66841]], dtype=dtype)
+ print(rr)
+ return rr
+
+def den_init(shape, dtype=None):
+ rr = tf.constant([[945], [420], [15]], dtype=dtype)
+ #rr = tf.constant([[946.604], [413.342], [12.465]], dtype=dtype)
+ print(rr)
+ return rr
+
+
+x = np.arange(-10, 10, .01)
+N = len(x)
+x = np.reshape(x, (1, -1, 1))
+x2 = x*x
+
+x2in = np.concatenate([x2*0 + 1, x2, x2*x2], axis=2)
+yout = np.tanh(x)
+
+
+model_x = Input(shape=(None, 1,))
+model_x2 = Input(shape=(None, 3,))
+
+num = Dense(1, name='num', use_bias=False, kernel_initializer=num_init)
+den = Dense(1, name='den', use_bias=False, kernel_initializer=den_init)
+
+def ratio(x):
+ return tf.minimum(1., tf.maximum(-1., x[0]*x[1]/x[2]))
+
+out_layer = Lambda(ratio)
+output = out_layer([model_x, num(model_x2), den(model_x2)])
+
+model = Model([model_x, model_x2], output)
+model.summary()
+
+model.compile(Adam(0.05, beta_1=0.9, beta_2=0.9, decay=2e-5), loss='mean_squared_error')
+model.fit([x, x2in], yout, batch_size=1, epochs=500000, validation_split=0.0)
+
+model.compile(Adam(0.001, beta_2=0.9, decay=1e-4), loss=my_loss1)
+model.fit([x, x2in], yout, batch_size=1, epochs=50000, validation_split=0.0)
+
+model.compile(Adam(0.0001, beta_2=0.9, decay=1e-4), loss=my_loss2)
+model.fit([x, x2in], yout, batch_size=1, epochs=50000, validation_split=0.0)
+
+model.compile(Adam(0.00001, beta_2=0.9, decay=1e-4), loss=my_loss3)
+model.fit([x, x2in], yout, batch_size=1, epochs=50000, validation_split=0.0)
+
+model.save_weights('tanh.h5')
diff --git a/dnn/training_tf2/parameters.py b/dnn/training_tf2/parameters.py
new file mode 100644
index 00000000..3621a4e4
--- /dev/null
+++ b/dnn/training_tf2/parameters.py
@@ -0,0 +1,29 @@
+""" module for handling extra model parameters for tf.keras models """
+
+import tensorflow as tf
+
+
+def set_parameter(model, parameter_name, parameter_value, dtype='float32'):
+ """ stores parameter_value as non-trainable weight with name parameter_name:0 """
+
+ weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")]
+
+ if len(weights) == 0:
+ model.add_weight(parameter_name, trainable=False, initializer=tf.keras.initializers.Constant(parameter_value), dtype=dtype)
+ elif len(weights) == 1:
+ weights[0].assign(parameter_value)
+ else:
+ raise ValueError(f"more than one weight starting with {parameter_name}:0 in model")
+
+
+def get_parameter(model, parameter_name, default=None):
+ """ returns parameter value if parameter is present in model and otherwise default """
+
+ weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")]
+
+ if len(weights) == 0:
+ return default
+ elif len(weights) > 1:
+ raise ValueError(f"more than one weight starting with {parameter_name}:0 in model")
+ else:
+ return weights[0].numpy().item()
diff --git a/dnn/training_tf2/plc_loader.py b/dnn/training_tf2/plc_loader.py
new file mode 100644
index 00000000..a9bd41d8
--- /dev/null
+++ b/dnn/training_tf2/plc_loader.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+import numpy as np
+from tensorflow.keras.utils import Sequence
+
+class PLCLoader(Sequence):
+ def __init__(self, features, lost, nb_burg_features, batch_size):
+ self.batch_size = batch_size
+ self.nb_batches = features.shape[0]//self.batch_size
+ self.features = features[:self.nb_batches*self.batch_size, :, :]
+ self.lost = lost.astype('float')
+ self.lost = self.lost[:(len(self.lost)//features.shape[1]-1)*features.shape[1]]
+ self.nb_burg_features = nb_burg_features
+ self.on_epoch_end()
+
+ def on_epoch_end(self):
+ self.indices = np.arange(self.nb_batches*self.batch_size)
+ np.random.shuffle(self.indices)
+ offset = np.random.randint(0, high=self.features.shape[1])
+ self.lost_offset = np.reshape(self.lost[offset:-self.features.shape[1]+offset], (-1, self.features.shape[1]))
+ self.lost_indices = np.random.randint(0, high=self.lost_offset.shape[0], size=self.nb_batches*self.batch_size)
+
+ def __getitem__(self, index):
+ features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
+ burg_lost = (np.random.rand(features.shape[0], features.shape[1]) > .1).astype('float')
+ burg_lost = np.reshape(burg_lost, (features.shape[0], features.shape[1], 1))
+ burg_mask = np.tile(burg_lost, (1,1,self.nb_burg_features))
+
+ lost = self.lost_offset[self.lost_indices[index*self.batch_size:(index+1)*self.batch_size], :]
+ lost = np.reshape(lost, (features.shape[0], features.shape[1], 1))
+ lost_mask = np.tile(lost, (1,1,features.shape[2]))
+ in_features = features*lost_mask
+ in_features[:,:,:self.nb_burg_features] = in_features[:,:,:self.nb_burg_features]*burg_mask
+
+ #For the first frame after a loss, we don't have valid features, but the Burg estimate is valid.
+ #in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]
+ out_lost = np.copy(lost)
+ #out_lost[:,1:,:] = out_lost[:,1:,:]*out_lost[:,:-1,:]
+
+ out_features = np.concatenate([features[:,:,self.nb_burg_features:], 1.-out_lost], axis=-1)
+ burg_sign = 2*burg_lost - 1
+ # last dim is 1 for received packet, 0 for lost packet, and -1 when just the Burg info is missing
+ inputs = [in_features*lost_mask, lost*burg_sign]
+ outputs = [out_features]
+ return (inputs, outputs)
+
+ def __len__(self):
+ return self.nb_batches
diff --git a/dnn/training_tf2/rdovae.py b/dnn/training_tf2/rdovae.py
new file mode 100644
index 00000000..6240120d
--- /dev/null
+++ b/dnn/training_tf2/rdovae.py
@@ -0,0 +1,372 @@
+#!/usr/bin/python3
+'''Copyright (c) 2022 Amazon
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+import math
+import tensorflow as tf
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise, AveragePooling1D, RepeatVector
+from tensorflow.compat.v1.keras.layers import CuDNNGRU
+from tensorflow.keras import backend as K
+from tensorflow.keras.constraints import Constraint
+from tensorflow.keras.initializers import Initializer
+from tensorflow.keras.callbacks import Callback
+from tensorflow.keras.regularizers import l1
+import numpy as np
+import h5py
+from uniform_noise import UniformNoise
+
+class WeightClip(Constraint):
+ '''Clips the weights incident to each hidden unit to be inside a range
+ '''
+ def __init__(self, c=2):
+ self.c = c
+
+ def __call__(self, p):
+ # Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
+ # saturation when implementing dot products with SSSE3 or AVX2.
+ return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
+ #return K.clip(p, -self.c, self.c)
+
+ def get_config(self):
+ return {'name': self.__class__.__name__,
+ 'c': self.c}
+
+constraint = WeightClip(0.496)
+
+def soft_quantize(x):
+ #x = 4*x
+ #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
+ #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
+ #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
+ return x
+
+def noise_quantize(x):
+ return soft_quantize(x + (K.random_uniform((128, 16, 80))-.5) )
+
+def hard_quantize(x):
+ x = soft_quantize(x)
+ quantized = tf.round(x)
+ return x + tf.stop_gradient(quantized - x)
+
+def apply_dead_zone(x):
+ d = x[1]*.05
+ x = x[0]
+ y = x - d*tf.math.tanh(x/(.1+d))
+ return y
+
+def rate_loss(y_true,y_pred):
+ log2_e = 1.4427
+ n = y_pred.shape[-1]
+ C = n - log2_e*np.math.log(np.math.gamma(n))
+ k = K.sum(K.abs(y_pred), axis=-1)
+ p = 1.5
+ #rate = C + (n-1)*log2_e*tf.math.log((k**p + (n/5)**p)**(1/p))
+ rate = C + (n-1)*log2_e*tf.math.log(k + .112*n**2/(n/1.8+k) )
+ return K.mean(rate)
+
+eps=1e-6
+def safelog2(x):
+ log2_e = 1.4427
+ return log2_e*tf.math.log(eps+x)
+
+def feat_dist_loss(y_true,y_pred):
+ lambda_1 = 1./K.sqrt(y_pred[:,:,:,-1])
+ y_pred = y_pred[:,:,:,:-1]
+ ceps = y_pred[:,:,:,:18] - y_true[:,:,:18]
+ pitch = 2*(y_pred[:,:,:,18:19] - y_true[:,:,18:19])/(y_true[:,:,18:19] + 2)
+ corr = y_pred[:,:,:,19:] - y_true[:,:,19:]
+ pitch_weight = K.square(K.maximum(0., y_true[:,:,19:]+.5))
+ return K.mean(lambda_1*K.mean(K.square(ceps) + 10*(1/18.)*K.abs(pitch)*pitch_weight + (1/18.)*K.square(corr), axis=-1))
+
+def sq1_rate_loss(y_true,y_pred):
+ lambda_val = K.sqrt(y_pred[:,:,-1])
+ y_pred = y_pred[:,:,:-1]
+ log2_e = 1.4427
+ n = y_pred.shape[-1]//3
+ r = (y_pred[:,:,2*n:])
+ p0 = (y_pred[:,:,n:2*n])
+ p0 = 1-r**(.5+.5*p0)
+ y_pred = y_pred[:,:,:n]
+ y_pred = soft_quantize(y_pred)
+
+ y0 = K.maximum(0., 1. - K.abs(y_pred))**2
+ rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
+ rate = -safelog2(-.5*tf.math.log(r)*r**K.abs(y_pred))
+ rate = -safelog2((1-r)/(1+r)*r**K.abs(y_pred))
+ #rate = -safelog2(- tf.math.sinh(.5*tf.math.log(r))* r**K.abs(y_pred) - tf.math.cosh(K.maximum(0., .5 - K.abs(y_pred))*tf.math.log(r)) + 1)
+ rate = lambda_val*K.sum(rate, axis=-1)
+ return K.mean(rate)
+
+def sq2_rate_loss(y_true,y_pred):
+ lambda_val = K.sqrt(y_pred[:,:,-1])
+ y_pred = y_pred[:,:,:-1]
+ log2_e = 1.4427
+ n = y_pred.shape[-1]//3
+ r = y_pred[:,:,2*n:]
+ p0 = y_pred[:,:,n:2*n]
+ p0 = 1-r**(.5+.5*p0)
+ #theta = K.minimum(1., .5 + 0*p0 - 0.04*tf.math.log(r))
+ #p0 = 1-r**theta
+ y_pred = tf.round(y_pred[:,:,:n])
+ y0 = K.maximum(0., 1. - K.abs(y_pred))**2
+ rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
+ rate = lambda_val*K.sum(rate, axis=-1)
+ return K.mean(rate)
+
+def sq_rate_metric(y_true,y_pred, reduce=True):
+ y_pred = y_pred[:,:,:-1]
+ log2_e = 1.4427
+ n = y_pred.shape[-1]//3
+ r = y_pred[:,:,2*n:]
+ p0 = y_pred[:,:,n:2*n]
+ p0 = 1-r**(.5+.5*p0)
+ #theta = K.minimum(1., .5 + 0*p0 - 0.04*tf.math.log(r))
+ #p0 = 1-r**theta
+ y_pred = tf.round(y_pred[:,:,:n])
+ y0 = K.maximum(0., 1. - K.abs(y_pred))**2
+ rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
+ rate = K.sum(rate, axis=-1)
+ if reduce:
+ rate = K.mean(rate)
+ return rate
+
+def pvq_quant_search(x, k):
+ x = x/tf.reduce_sum(tf.abs(x), axis=-1, keepdims=True)
+ kx = k*x
+ y = tf.round(kx)
+ newk = k
+
+ for j in range(10):
+ #print("y = ", y)
+ #print("iteration ", j)
+ abs_y = tf.abs(y)
+ abs_kx = tf.abs(kx)
+ kk=tf.reduce_sum(abs_y, axis=-1)
+ #print("sums = ", kk)
+ plus = 1.000001*tf.reduce_min((abs_y+.5)/(abs_kx+1e-15), axis=-1)
+ minus = .999999*tf.reduce_max((abs_y-.5)/(abs_kx+1e-15), axis=-1)
+ #print("plus = ", plus)
+ #print("minus = ", minus)
+ factor = tf.where(kk>k, minus, plus)
+ factor = tf.where(kk==k, tf.ones_like(factor), factor)
+ #print("scale = ", factor)
+ factor = tf.expand_dims(factor, axis=-1)
+ #newk = newk * (k/kk)**.2
+ newk = newk*factor
+ kx = newk*x
+ #print("newk = ", newk)
+ #print("unquantized = ", newk*x)
+ y = tf.round(kx)
+
+ #print(y)
+ #print(K.mean(K.sum(K.abs(y), axis=-1)))
+ return y
+
+def pvq_quantize(x, k):
+ x = x/(1e-15+tf.norm(x, axis=-1,keepdims=True))
+ quantized = pvq_quant_search(x, k)
+ quantized = quantized/(1e-15+tf.norm(quantized, axis=-1,keepdims=True))
+ return x + tf.stop_gradient(quantized - x)
+
+
+def var_repeat(x):
+ return tf.repeat(tf.expand_dims(x[0], 1), K.shape(x[1])[1], axis=1)
+
+nb_state_dim = 24
+
+def new_rdovae_encoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
+ feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
+
+ gru = CuDNNGRU if training else GRU
+ enc_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense1')
+ enc_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense2')
+ enc_dense3 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense3')
+ enc_dense4 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense4')
+ enc_dense5 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense5')
+ enc_dense6 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense6')
+ enc_dense7 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='enc_dense7')
+ enc_dense8 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='enc_dense8')
+
+ #bits_dense = Dense(nb_bits, activation='linear', name='bits_dense')
+ bits_dense = Conv1D(nb_bits, 4, padding='causal', activation='linear', name='bits_dense')
+
+ zero_out = Lambda(lambda x: 0*x)
+ inputs = Reshape((-1, 2*nb_used_features))(feat)
+ d1 = enc_dense1(inputs)
+ d2 = enc_dense2(d1)
+ d3 = enc_dense3(d2)
+ d4 = enc_dense4(d3)
+ d5 = enc_dense5(d4)
+ d6 = enc_dense6(d5)
+ d7 = enc_dense7(d6)
+ d8 = enc_dense8(d7)
+ pre_out = Concatenate()([d1, d2, d3, d4, d5, d6, d7, d8])
+ enc_out = bits_dense(pre_out)
+ global_dense1 = Dense(128, activation='tanh', name='gdense1')
+ global_dense2 = Dense(nb_state_dim, activation='tanh', name='gdense2')
+ global_bits = global_dense2(global_dense1(pre_out))
+
+ encoder = Model([feat], [enc_out, global_bits], name='encoder')
+ return encoder
+
+def new_rdovae_decoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
+ bits_input = Input(shape=(None, nb_bits), batch_size=batch_size, name="dec_bits")
+ gru_state_input = Input(shape=(nb_state_dim,), batch_size=batch_size, name="dec_state")
+
+
+ gru = CuDNNGRU if training else GRU
+ dec_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense1')
+ dec_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense2')
+ dec_dense3 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense3')
+ dec_dense4 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense4')
+ dec_dense5 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense5')
+ dec_dense6 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense6')
+ dec_dense7 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='dec_dense7')
+ dec_dense8 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='dec_dense8')
+
+ dec_final = Dense(bunch*nb_used_features, activation='linear', name='dec_final')
+
+ time_reverse = Lambda(lambda x: K.reverse(x, 1))
+ #time_reverse = Lambda(lambda x: x)
+ #gru_state_rep = RepeatVector(64//bunch)(gru_state_input)
+
+ #gru_state_rep = Lambda(var_repeat, output_shape=(None, nb_state_dim)) ([gru_state_input, bits_input])
+ gru_state1 = Dense(cond_size, name="state1", activation='tanh')(gru_state_input)
+ gru_state2 = Dense(cond_size, name="state2", activation='tanh')(gru_state_input)
+ gru_state3 = Dense(cond_size, name="state3", activation='tanh')(gru_state_input)
+
+ dec1 = dec_dense1(time_reverse(bits_input))
+ dec2 = dec_dense2(dec1, initial_state=gru_state1)
+ dec3 = dec_dense3(dec2)
+ dec4 = dec_dense4(dec3, initial_state=gru_state2)
+ dec5 = dec_dense5(dec4)
+ dec6 = dec_dense6(dec5, initial_state=gru_state3)
+ dec7 = dec_dense7(dec6)
+ dec8 = dec_dense8(dec7)
+ output = Reshape((-1, nb_used_features))(dec_final(Concatenate()([dec1, dec2, dec3, dec4, dec5, dec6, dec7, dec8])))
+ decoder = Model([bits_input, gru_state_input], time_reverse(output), name='decoder')
+ decoder.nb_bits = nb_bits
+ decoder.bunch = bunch
+ return decoder
+
+def new_split_decoder(decoder):
+ nb_bits = decoder.nb_bits
+ bunch = decoder.bunch
+ bits_input = Input(shape=(None, nb_bits), name="split_bits")
+ gru_state_input = Input(shape=(None,nb_state_dim), name="split_state")
+
+ range_select = Lambda(lambda x: x[0][:,x[1]:x[2],:])
+ elem_select = Lambda(lambda x: x[0][:,x[1],:])
+ points = [0, 100, 200, 300, 400]
+ outputs = []
+ for i in range(len(points)-1):
+ begin = points[i]//bunch
+ end = points[i+1]//bunch
+ state = elem_select([gru_state_input, end-1])
+ bits = range_select([bits_input, begin, end])
+ outputs.append(decoder([bits, state]))
+ output = Concatenate(axis=1)(outputs)
+ split = Model([bits_input, gru_state_input], output, name="split")
+ return split
+
+def tensor_concat(x):
+ #n = x[1]//2
+ #x = x[0]
+ n=2
+ y = []
+ for i in range(n-1):
+ offset = 2 * (n-1-i)
+ tmp = K.concatenate([x[i][:, offset:, :], x[-1][:, -offset:, :]], axis=-2)
+ y.append(tf.expand_dims(tmp, axis=0))
+ y.append(tf.expand_dims(x[-1], axis=0))
+ return Concatenate(axis=0)(y)
+
+
+def new_rdovae_model(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
+
+ feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
+ quant_id = Input(shape=(None,), batch_size=batch_size)
+ lambda_val = Input(shape=(None, 1), batch_size=batch_size)
+ lambda_bunched = AveragePooling1D(pool_size=bunch//2, strides=bunch//2, padding="valid")(lambda_val)
+ lambda_up = Lambda(lambda x: K.repeat_elements(x, 2, axis=-2))(lambda_val)
+
+ qembedding = Embedding(nb_quant, 6*nb_bits, name='quant_embed', embeddings_initializer='zeros')
+ quant_embed_dec = qembedding(quant_id)
+ quant_scale = Activation('softplus')(Lambda(lambda x: x[:,:,:nb_bits], name='quant_scale_embed')(quant_embed_dec))
+
+ encoder = new_rdovae_encoder(nb_used_features, nb_bits, bunch, nb_quant, batch_size, cond_size, cond_size2, training=training)
+ ze, gru_state_dec = encoder([feat])
+ ze = Multiply()([ze, quant_scale])
+
+ decoder = new_rdovae_decoder(nb_used_features, nb_bits, bunch, nb_quant, batch_size, cond_size, cond_size2, training=training)
+ split_decoder = new_split_decoder(decoder)
+
+ dead_zone = Activation('softplus')(Lambda(lambda x: x[:,:,nb_bits:2*nb_bits], name='dead_zone_embed')(quant_embed_dec))
+ soft_distr_embed = Activation('sigmoid')(Lambda(lambda x: x[:,:,2*nb_bits:4*nb_bits], name='soft_distr_embed')(quant_embed_dec))
+ hard_distr_embed = Activation('sigmoid')(Lambda(lambda x: x[:,:,4*nb_bits:], name='hard_distr_embed')(quant_embed_dec))
+
+ noisequant = UniformNoise()
+ hardquant = Lambda(hard_quantize)
+ dzone = Lambda(apply_dead_zone)
+ dze = dzone([ze,dead_zone])
+ ndze = noisequant(dze)
+ dze_quant = hardquant(dze)
+
+ div = Lambda(lambda x: x[0]/x[1])
+ dze_quant = div([dze_quant,quant_scale])
+ ndze_unquant = div([ndze,quant_scale])
+
+ mod_select = Lambda(lambda x: x[0][:,x[1]::bunch//2,:])
+ gru_state_dec = Lambda(lambda x: pvq_quantize(x, 82))(gru_state_dec)
+ combined_output = []
+ unquantized_output = []
+ cat = Concatenate(name="out_cat")
+ for i in range(bunch//2):
+ dze_select = mod_select([dze_quant, i])
+ ndze_select = mod_select([ndze_unquant, i])
+ state_select = mod_select([gru_state_dec, i])
+
+ tmp = split_decoder([dze_select, state_select])
+ tmp = cat([tmp, lambda_up])
+ combined_output.append(tmp)
+
+ tmp = split_decoder([ndze_select, state_select])
+ tmp = cat([tmp, lambda_up])
+ unquantized_output.append(tmp)
+
+ concat = Lambda(tensor_concat, name="output")
+ combined_output = concat(combined_output)
+ unquantized_output = concat(unquantized_output)
+
+ e2 = Concatenate(name="hard_bits")([dze, hard_distr_embed, lambda_val])
+ e = Concatenate(name="soft_bits")([dze, soft_distr_embed, lambda_val])
+
+
+ model = Model([feat, quant_id, lambda_val], [combined_output, unquantized_output, e, e2], name="end2end")
+ model.nb_used_features = nb_used_features
+
+ return model, encoder, decoder, qembedding
diff --git a/dnn/training_tf2/rdovae_exchange.py b/dnn/training_tf2/rdovae_exchange.py
new file mode 100644
index 00000000..3249677d
--- /dev/null
+++ b/dnn/training_tf2/rdovae_exchange.py
@@ -0,0 +1,138 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+import argparse
+import os
+import sys
+
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
+parser.add_argument('output', metavar="<output folder>", type=str, help='output exchange folder')
+parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
+parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
+parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
+
+args = parser.parse_args()
+
+# now import the heavy stuff
+from rdovae import new_rdovae_model
+from wexchange.tf import dump_tf_weights, load_tf_weights
+
+
+exchange_name = {
+ 'enc_dense1' : 'encoder_stack_layer1_dense',
+ 'enc_dense3' : 'encoder_stack_layer3_dense',
+ 'enc_dense5' : 'encoder_stack_layer5_dense',
+ 'enc_dense7' : 'encoder_stack_layer7_dense',
+ 'enc_dense8' : 'encoder_stack_layer8_dense',
+ 'gdense1' : 'encoder_state_layer1_dense',
+ 'gdense2' : 'encoder_state_layer2_dense',
+ 'enc_dense2' : 'encoder_stack_layer2_gru',
+ 'enc_dense4' : 'encoder_stack_layer4_gru',
+ 'enc_dense6' : 'encoder_stack_layer6_gru',
+ 'bits_dense' : 'encoder_stack_layer9_conv',
+ 'qembedding' : 'statistical_model_embedding',
+ 'state1' : 'decoder_state1_dense',
+ 'state2' : 'decoder_state2_dense',
+ 'state3' : 'decoder_state3_dense',
+ 'dec_dense1' : 'decoder_stack_layer1_dense',
+ 'dec_dense3' : 'decoder_stack_layer3_dense',
+ 'dec_dense5' : 'decoder_stack_layer5_dense',
+ 'dec_dense7' : 'decoder_stack_layer7_dense',
+ 'dec_dense8' : 'decoder_stack_layer8_dense',
+ 'dec_final' : 'decoder_stack_layer9_dense',
+ 'dec_dense2' : 'decoder_stack_layer2_gru',
+ 'dec_dense4' : 'decoder_stack_layer4_gru',
+ 'dec_dense6' : 'decoder_stack_layer6_gru'
+}
+
+
+if __name__ == "__main__":
+
+ model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
+ model.load_weights(args.weights)
+
+ os.makedirs(args.output, exist_ok=True)
+
+ # encoder
+ encoder_dense_names = [
+ 'enc_dense1',
+ 'enc_dense3',
+ 'enc_dense5',
+ 'enc_dense7',
+ 'enc_dense8',
+ 'gdense1',
+ 'gdense2'
+ ]
+
+ encoder_gru_names = [
+ 'enc_dense2',
+ 'enc_dense4',
+ 'enc_dense6'
+ ]
+
+ encoder_conv1d_names = [
+ 'bits_dense'
+ ]
+
+
+ for name in encoder_dense_names + encoder_gru_names + encoder_conv1d_names:
+ print(f"writing layer {exchange_name[name]}...")
+ dump_tf_weights(os.path.join(args.output, exchange_name[name]), encoder.get_layer(name))
+
+ # qembedding
+ print(f"writing layer {exchange_name['qembedding']}...")
+ dump_tf_weights(os.path.join(args.output, exchange_name['qembedding']), qembedding)
+
+ # decoder
+ decoder_dense_names = [
+ 'state1',
+ 'state2',
+ 'state3',
+ 'dec_dense1',
+ 'dec_dense3',
+ 'dec_dense5',
+ 'dec_dense7',
+ 'dec_dense8',
+ 'dec_final'
+ ]
+
+ decoder_gru_names = [
+ 'dec_dense2',
+ 'dec_dense4',
+ 'dec_dense6'
+ ]
+
+ for name in decoder_dense_names + decoder_gru_names:
+ print(f"writing layer {exchange_name[name]}...")
+ dump_tf_weights(os.path.join(args.output, exchange_name[name]), decoder.get_layer(name))
diff --git a/dnn/training_tf2/rdovae_import.py b/dnn/training_tf2/rdovae_import.py
new file mode 100644
index 00000000..bc8b460d
--- /dev/null
+++ b/dnn/training_tf2/rdovae_import.py
@@ -0,0 +1,123 @@
+"""
+/* Copyright (c) 2022 Amazon
+ Written by Jan Buethe */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+import argparse
+import os
+import sys
+
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('input', metavar="<input folder>", type=str, help='input exchange folder')
+parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
+parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
+parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
+parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
+
+args = parser.parse_args()
+
+# now import the heavy stuff
+from rdovae import new_rdovae_model
+from wexchange.tf import load_tf_weights
+
+
+exchange_name = {
+ 'enc_dense1' : 'encoder_stack_layer1_dense',
+ 'enc_dense3' : 'encoder_stack_layer3_dense',
+ 'enc_dense5' : 'encoder_stack_layer5_dense',
+ 'enc_dense7' : 'encoder_stack_layer7_dense',
+ 'enc_dense8' : 'encoder_stack_layer8_dense',
+ 'gdense1' : 'encoder_state_layer1_dense',
+ 'gdense2' : 'encoder_state_layer2_dense',
+ 'enc_dense2' : 'encoder_stack_layer2_gru',
+ 'enc_dense4' : 'encoder_stack_layer4_gru',
+ 'enc_dense6' : 'encoder_stack_layer6_gru',
+ 'bits_dense' : 'encoder_stack_layer9_conv',
+ 'qembedding' : 'statistical_model_embedding',
+ 'state1' : 'decoder_state1_dense',
+ 'state2' : 'decoder_state2_dense',
+ 'state3' : 'decoder_state3_dense',
+ 'dec_dense1' : 'decoder_stack_layer1_dense',
+ 'dec_dense3' : 'decoder_stack_layer3_dense',
+ 'dec_dense5' : 'decoder_stack_layer5_dense',
+ 'dec_dense7' : 'decoder_stack_layer7_dense',
+ 'dec_dense8' : 'decoder_stack_layer8_dense',
+ 'dec_final' : 'decoder_stack_layer9_dense',
+ 'dec_dense2' : 'decoder_stack_layer2_gru',
+ 'dec_dense4' : 'decoder_stack_layer4_gru',
+ 'dec_dense6' : 'decoder_stack_layer6_gru'
+}
+
+if __name__ == "__main__":
+
+ model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
+
+ encoder_layers = [
+ 'enc_dense1',
+ 'enc_dense3',
+ 'enc_dense5',
+ 'enc_dense7',
+ 'enc_dense8',
+ 'gdense1',
+ 'gdense2',
+ 'enc_dense2',
+ 'enc_dense4',
+ 'enc_dense6',
+ 'bits_dense'
+ ]
+
+ decoder_layers = [
+ 'state1',
+ 'state2',
+ 'state3',
+ 'dec_dense1',
+ 'dec_dense3',
+ 'dec_dense5',
+ 'dec_dense7',
+ 'dec_dense8',
+ 'dec_final',
+ 'dec_dense2',
+ 'dec_dense4',
+ 'dec_dense6'
+ ]
+
+ for name in encoder_layers:
+ print(f"loading weight for layer {name}...")
+ load_tf_weights(os.path.join(args.input, exchange_name[name]), encoder.get_layer(name))
+
+ print(f"loading weight for layer qembedding...")
+ load_tf_weights(os.path.join(args.input, exchange_name['qembedding']), qembedding)
+
+ for name in decoder_layers:
+ print(f"loading weight for layer {name}...")
+ load_tf_weights(os.path.join(args.input, exchange_name[name]), decoder.get_layer(name))
+
+ model.save(args.weights)
diff --git a/dnn/training_tf2/test_lpcnet.py b/dnn/training_tf2/test_lpcnet.py
new file mode 100755
index 00000000..ca551e63
--- /dev/null
+++ b/dnn/training_tf2/test_lpcnet.py
@@ -0,0 +1,120 @@
+#!/usr/bin/python3
+'''Copyright (c) 2018 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+import argparse
+import sys
+
+import h5py
+import numpy as np
+
+import lpcnet
+from ulaw import ulaw2lin, lin2ulaw
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('model-file', type=str, help='model weight h5 file')
+parser.add_argument('--lpc-gamma', type=float, help='LPC weighting factor. WARNING: giving an inconsistent value here will severely degrade performance', default=1)
+
+args = parser.parse_args()
+
+filename = args.model_file
+with h5py.File(filename, "r") as f:
+ units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape)
+ units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape)
+ cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape)
+ e2e = 'rc2lpc' in f['model_weights']
+
+
+model, enc, dec = lpcnet.new_lpcnet_model(training = False, rnn_units1=units, rnn_units2=units2, flag_e2e = e2e, cond_size=cond_size, batch_size=1)
+
+model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
+#model.summary()
+
+
+feature_file = sys.argv[2]
+out_file = sys.argv[3]
+frame_size = model.frame_size
+nb_features = 36
+nb_used_features = model.nb_used_features
+
+features = np.fromfile(feature_file, dtype='float32')
+features = np.resize(features, (-1, nb_features))
+nb_frames = 1
+feature_chunk_size = features.shape[0]
+pcm_chunk_size = frame_size*feature_chunk_size
+
+features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
+periods = (.1 + 50*features[:,:,18:19]+100).astype('int16')
+
+
+
+model.load_weights(filename);
+
+order = 16
+
+pcm = np.zeros((nb_frames*pcm_chunk_size, ))
+fexc = np.zeros((1, 1, 3), dtype='int16')+128
+state1 = np.zeros((1, model.rnn_units1), dtype='float32')
+state2 = np.zeros((1, model.rnn_units2), dtype='float32')
+
+mem = 0
+coef = 0.85
+
+lpc_weights = np.array([args.lpc_gamma ** (i + 1) for i in range(16)])
+
+fout = open(out_file, 'wb')
+
+skip = order + 1
+for c in range(0, nb_frames):
+ if not e2e:
+ cfeat = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
+ else:
+ cfeat,lpcs = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
+ for fr in range(0, feature_chunk_size):
+ f = c*feature_chunk_size + fr
+ if not e2e:
+ a = features[c, fr, nb_features-order:] * lpc_weights
+ else:
+ a = lpcs[c,fr]
+ for i in range(skip, frame_size):
+ pred = -sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1])
+ fexc[0, 0, 1] = lin2ulaw(pred)
+
+ p, state1, state2 = dec.predict([fexc, cfeat[:, fr:fr+1, :], state1, state2])
+ #Lower the temperature for voiced frames to reduce noisiness
+ p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 19] - .5))
+ p = p/(1e-18 + np.sum(p))
+ #Cut off the tail of the remaining distribution
+ p = np.maximum(p-0.002, 0).astype('float64')
+ p = p/(1e-8 + np.sum(p))
+
+ fexc[0, 0, 2] = np.argmax(np.random.multinomial(1, p[0,0,:], 1))
+ pcm[f*frame_size + i] = pred + ulaw2lin(fexc[0, 0, 2])
+ fexc[0, 0, 0] = lin2ulaw(pcm[f*frame_size + i])
+ mem = coef*mem + pcm[f*frame_size + i]
+ #print(mem)
+ np.array([np.round(mem)], dtype='int16').tofile(fout)
+ skip = 0
diff --git a/dnn/training_tf2/test_plc.py b/dnn/training_tf2/test_plc.py
new file mode 100644
index 00000000..0c0ac25f
--- /dev/null
+++ b/dnn/training_tf2/test_plc.py
@@ -0,0 +1,92 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+ Copyright (c) 2018-2019 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+# Train an LPCNet model
+
+import argparse
+from plc_loader import PLCLoader
+
+parser = argparse.ArgumentParser(description='Test a PLC model')
+
+parser.add_argument('weights', metavar='<weights file>', help='weights file (.h5)')
+parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
+parser.add_argument('output', metavar='<output>', help='reconstructed file (float32)')
+parser.add_argument('--model', metavar='<model>', default='lpcnet_plc', help='PLC model python definition (without .py)')
+group1 = parser.add_mutually_exclusive_group()
+
+parser.add_argument('--gru-size', metavar='<units>', default=256, type=int, help='number of units in GRU (default 256)')
+parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network (default 128)')
+
+
+args = parser.parse_args()
+
+import importlib
+lpcnet = importlib.import_module(args.model)
+
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
+import tensorflow.keras.backend as K
+import h5py
+
+import tensorflow as tf
+#gpus = tf.config.experimental.list_physical_devices('GPU')
+#if gpus:
+# try:
+# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
+# except RuntimeError as e:
+# print(e)
+
+model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=1, training=False, quantize=False, cond_size=args.cond_size)
+model.compile()
+
+lpc_order = 16
+
+feature_file = args.features
+nb_features = model.nb_used_features + lpc_order
+nb_used_features = model.nb_used_features
+
+# u for unquantised, load 16 bit PCM samples and convert to mu-law
+
+features = np.loadtxt(feature_file)
+print(features.shape)
+sequence_size = features.shape[0]
+lost = np.reshape(features[:,-1:], (1, sequence_size, 1))
+features = features[:,:nb_used_features]
+features = np.reshape(features, (1, sequence_size, nb_used_features))
+
+
+model.load_weights(args.weights)
+
+features = features*lost
+out = model.predict([features, lost])
+
+out = features + (1-lost)*out
+
+np.savetxt(args.output, out[0,:,:])
diff --git a/dnn/training_tf2/tf_funcs.py b/dnn/training_tf2/tf_funcs.py
new file mode 100644
index 00000000..b86f075c
--- /dev/null
+++ b/dnn/training_tf2/tf_funcs.py
@@ -0,0 +1,70 @@
+"""
+Tensorflow/Keras helper functions to do the following:
+ 1. \mu law <-> Linear domain conversion
+ 2. Differentiable prediction from the input signal and LP coefficients
+ 3. Differentiable transformations Reflection Coefficients (RCs) <-> LP Coefficients
+"""
+from tensorflow.keras.layers import Lambda, Multiply, Layer, Concatenate
+from tensorflow.keras import backend as K
+import tensorflow as tf
+
+# \mu law <-> Linear conversion functions
+scale = 255.0/32768.0
+scale_1 = 32768.0/255.0
+def tf_l2u(x):
+ s = K.sign(x)
+ x = K.abs(x)
+ u = (s*(128*K.log(1+scale*x)/K.log(256.0)))
+ u = K.clip(128 + u, 0, 255)
+ return u
+
+def tf_u2l(u):
+ u = tf.cast(u,"float32")
+ u = u - 128.0
+ s = K.sign(u)
+ u = K.abs(u)
+ return s*scale_1*(K.exp(u/128.*K.log(256.0))-1)
+
+# Differentiable Prediction Layer
+# Computes the LP prediction from the input lag signal and the LP coefficients
+# The inputs xt and lpc conform with the shapes in lpcnet.py (the '2400' is coded keeping this in mind)
+class diff_pred(Layer):
+ def call(self, inputs, lpcoeffs_N = 16, frame_size = 160):
+ xt = inputs[0]
+ lpc = inputs[1]
+
+ rept = Lambda(lambda x: K.repeat_elements(x , frame_size, 1))
+ zpX = Lambda(lambda x: K.concatenate([0*x[:,0:lpcoeffs_N,:], x],axis = 1))
+ cX = Lambda(lambda x: K.concatenate([x[:,(lpcoeffs_N - i):(lpcoeffs_N - i + 2400),:] for i in range(lpcoeffs_N)],axis = 2))
+
+ pred = -Multiply()([rept(lpc),cX(zpX(xt))])
+
+ return K.sum(pred,axis = 2,keepdims = True)
+
+# Differentiable Transformations (RC <-> LPC) computed using the Levinson Durbin Recursion
+class diff_rc2lpc(Layer):
+ def call(self, inputs, lpcoeffs_N = 16):
+ def pred_lpc_recursive(input):
+ temp = (input[0] + K.repeat_elements(input[1],input[0].shape[2],2)*K.reverse(input[0],axes = 2))
+ temp = Concatenate(axis = 2)([temp,input[1]])
+ return temp
+ Llpc = Lambda(pred_lpc_recursive)
+ inputs = inputs[:,:,:lpcoeffs_N]
+ lpc_init = inputs
+ for i in range(1,lpcoeffs_N):
+ lpc_init = Llpc([lpc_init[:,:,:i],K.expand_dims(inputs[:,:,i],axis = -1)])
+ return lpc_init
+
+class diff_lpc2rc(Layer):
+ def call(self, inputs, lpcoeffs_N = 16):
+ def pred_rc_recursive(input):
+ ki = K.repeat_elements(K.expand_dims(input[1][:,:,0],axis = -1),input[0].shape[2],2)
+ temp = (input[0] - ki*K.reverse(input[0],axes = 2))/(1 - ki*ki)
+ temp = Concatenate(axis = 2)([temp,input[1]])
+ return temp
+ Lrc = Lambda(pred_rc_recursive)
+ rc_init = inputs
+ for i in range(1,lpcoeffs_N):
+ j = (lpcoeffs_N - i + 1)
+ rc_init = Lrc([rc_init[:,:,:(j - 1)],rc_init[:,:,(j - 1):]])
+ return rc_init
diff --git a/dnn/training_tf2/train_lpcnet.py b/dnn/training_tf2/train_lpcnet.py
new file mode 100755
index 00000000..60e2b56f
--- /dev/null
+++ b/dnn/training_tf2/train_lpcnet.py
@@ -0,0 +1,214 @@
+#!/usr/bin/python3
+'''Copyright (c) 2018 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+# Train an LPCNet model
+
+import argparse
+import os
+
+from dataloader import LPCNetLoader
+
+parser = argparse.ArgumentParser(description='Train an LPCNet model')
+
+parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
+parser.add_argument('data', metavar='<audio data file>', help='binary audio data file (uint8)')
+parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
+parser.add_argument('--model', metavar='<model>', default='lpcnet', help='LPCNet model python definition (without .py)')
+group1 = parser.add_mutually_exclusive_group()
+group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
+group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
+parser.add_argument('--density', metavar='<global density>', type=float, help='average density of the recurrent weights (default 0.1)')
+parser.add_argument('--density-split', nargs=3, metavar=('<update>', '<reset>', '<state>'), type=float, help='density of each recurrent gate (default 0.05, 0.05, 0.2)')
+parser.add_argument('--grub-density', metavar='<global GRU B density>', type=float, help='average density of the recurrent weights (default 1.0)')
+parser.add_argument('--grub-density-split', nargs=3, metavar=('<update>', '<reset>', '<state>'), type=float, help='density of each GRU B input gate (default 1.0, 1.0, 1.0)')
+parser.add_argument('--grua-size', metavar='<units>', default=384, type=int, help='number of units in GRU A (default 384)')
+parser.add_argument('--grub-size', metavar='<units>', default=16, type=int, help='number of units in GRU B (default 16)')
+parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network, aka frame rate network (default 128)')
+parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
+parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
+parser.add_argument('--end2end', dest='flag_e2e', action='store_true', help='Enable end-to-end training (with differentiable LPC computation')
+parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
+parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
+parser.add_argument('--gamma', metavar='<gamma>', type=float, help='adjust u-law compensation (default 2.0, should not be less than 1.0)')
+parser.add_argument('--lookahead', metavar='<nb frames>', default=2, type=int, help='Number of look-ahead frames (default 2)')
+parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
+parser.add_argument('--lpc-gamma', type=float, default=1, help='gamma for LPC weighting')
+parser.add_argument('--cuda-devices', metavar='<cuda devices>', type=str, default=None, help='string with comma separated cuda device ids')
+
+args = parser.parse_args()
+
+# set visible cuda devices
+if args.cuda_devices != None:
+ os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_devices
+
+density = (0.05, 0.05, 0.2)
+if args.density_split is not None:
+ density = args.density_split
+elif args.density is not None:
+ density = [0.5*args.density, 0.5*args.density, 2.0*args.density];
+
+grub_density = (1., 1., 1.)
+if args.grub_density_split is not None:
+ grub_density = args.grub_density_split
+elif args.grub_density is not None:
+ grub_density = [0.5*args.grub_density, 0.5*args.grub_density, 2.0*args.grub_density];
+
+gamma = 2.0 if args.gamma is None else args.gamma
+
+import importlib
+lpcnet = importlib.import_module(args.model)
+
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
+from ulaw import ulaw2lin, lin2ulaw
+import tensorflow.keras.backend as K
+import h5py
+
+import tensorflow as tf
+from tf_funcs import *
+from lossfuncs import *
+#gpus = tf.config.experimental.list_physical_devices('GPU')
+#if gpus:
+# try:
+# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
+# except RuntimeError as e:
+# print(e)
+
+nb_epochs = args.epochs
+
+# Try reducing batch_size if you run out of memory on your GPU
+batch_size = args.batch_size
+
+quantize = args.quantize is not None
+retrain = args.retrain is not None
+
+lpc_order = 16
+
+if quantize:
+ lr = 0.00003
+ decay = 0
+ input_model = args.quantize
+else:
+ lr = 0.001
+ decay = 5e-5
+
+if args.lr is not None:
+ lr = args.lr
+
+if args.decay is not None:
+ decay = args.decay
+
+if retrain:
+ input_model = args.retrain
+
+flag_e2e = args.flag_e2e
+
+opt = Adam(lr, decay=decay, beta_1=0.5, beta_2=0.8)
+strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
+
+with strategy.scope():
+ model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size,
+ rnn_units2=args.grub_size,
+ batch_size=batch_size, training=True,
+ quantize=quantize,
+ flag_e2e=flag_e2e,
+ cond_size=args.cond_size,
+ lpc_gamma=args.lpc_gamma,
+ lookahead=args.lookahead
+ )
+ if not flag_e2e:
+ model.compile(optimizer=opt, loss=metric_cel, metrics=metric_cel)
+ else:
+ model.compile(optimizer=opt, loss = [interp_mulaw(gamma=gamma), loss_matchlar()], loss_weights = [1.0, 2.0], metrics={'pdf':[metric_cel,metric_icel,metric_exc_sd,metric_oginterploss]})
+ model.summary()
+
+feature_file = args.features
+pcm_file = args.data # 16 bit unsigned short PCM samples
+frame_size = model.frame_size
+nb_features = model.nb_used_features + lpc_order
+nb_used_features = model.nb_used_features
+feature_chunk_size = 15
+pcm_chunk_size = frame_size*feature_chunk_size
+
+# u for unquantised, load 16 bit PCM samples and convert to mu-law
+
+data = np.memmap(pcm_file, dtype='int16', mode='r')
+nb_frames = (len(data)//(2*pcm_chunk_size)-1)//batch_size*batch_size
+
+features = np.memmap(feature_file, dtype='float32', mode='r')
+
+# limit to discrete number of frames
+data = data[(4-args.lookahead)*2*frame_size:]
+data = data[:nb_frames*2*pcm_chunk_size]
+
+
+data = np.reshape(data, (nb_frames, pcm_chunk_size, 2))
+
+#print("ulaw std = ", np.std(out_exc))
+
+sizeof = features.strides[-1]
+features = np.lib.stride_tricks.as_strided(features, shape=(nb_frames, feature_chunk_size+4, nb_features),
+ strides=(feature_chunk_size*nb_features*sizeof, nb_features*sizeof, sizeof))
+#features = features[:, :, :nb_used_features]
+
+
+periods = (.1 + 50*features[:,:,nb_used_features-2:nb_used_features-1]+100).astype('int16')
+#periods = np.minimum(periods, 255)
+
+# dump models to disk as we go
+checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.grua_size, '{epoch:02d}'))
+
+if args.retrain is not None:
+ model.load_weights(args.retrain)
+
+if quantize or retrain:
+ #Adapting from an existing model
+ model.load_weights(input_model)
+ if quantize:
+ sparsify = lpcnet.Sparsify(10000, 30000, 100, density, quantize=True)
+ grub_sparsify = lpcnet.SparsifyGRUB(10000, 30000, 100, args.grua_size, grub_density, quantize=True)
+ else:
+ sparsify = lpcnet.Sparsify(0, 0, 1, density)
+ grub_sparsify = lpcnet.SparsifyGRUB(0, 0, 1, args.grua_size, grub_density)
+else:
+ #Training from scratch
+ sparsify = lpcnet.Sparsify(2000, 20000, 400, density)
+ grub_sparsify = lpcnet.SparsifyGRUB(2000, 40000, 400, args.grua_size, grub_density)
+
+model.save_weights('{}_{}_initial.h5'.format(args.output, args.grua_size))
+
+loader = LPCNetLoader(data, features, periods, batch_size, e2e=flag_e2e, lookahead=args.lookahead)
+
+callbacks = [checkpoint, sparsify, grub_sparsify]
+if args.logdir is not None:
+ logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.grua_size)
+ tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
+ callbacks.append(tensorboard_callback)
+
+model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)
diff --git a/dnn/training_tf2/train_plc.py b/dnn/training_tf2/train_plc.py
new file mode 100644
index 00000000..ca30c457
--- /dev/null
+++ b/dnn/training_tf2/train_plc.py
@@ -0,0 +1,197 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+ Copyright (c) 2018-2019 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+# Train an LPCNet model
+
+import argparse
+from plc_loader import PLCLoader
+
+parser = argparse.ArgumentParser(description='Train a PLC model')
+
+parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
+parser.add_argument('lost_file', metavar='<packet loss file>', help='packet loss traces (int8)')
+parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
+parser.add_argument('--model', metavar='<model>', default='lpcnet_plc', help='PLC model python definition (without .py)')
+group1 = parser.add_mutually_exclusive_group()
+group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
+group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
+parser.add_argument('--gru-size', metavar='<units>', default=256, type=int, help='number of units in GRU (default 256)')
+parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network (default 128)')
+parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
+parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
+parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
+parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
+parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
+parser.add_argument('--band-loss', metavar='<weight>', default=1.0, type=float, help='weight of band loss (default 1.0)')
+parser.add_argument('--loss-bias', metavar='<bias>', default=0.0, type=float, help='loss bias towards low energy (default 0.0)')
+parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
+
+
+args = parser.parse_args()
+
+import importlib
+lpcnet = importlib.import_module(args.model)
+
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
+import tensorflow.keras.backend as K
+import h5py
+
+import tensorflow as tf
+#gpus = tf.config.experimental.list_physical_devices('GPU')
+#if gpus:
+# try:
+# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
+# except RuntimeError as e:
+# print(e)
+
+nb_epochs = args.epochs
+
+# Try reducing batch_size if you run out of memory on your GPU
+batch_size = args.batch_size
+
+quantize = args.quantize is not None
+retrain = args.retrain is not None
+
+if quantize:
+ lr = 0.00003
+ decay = 0
+ input_model = args.quantize
+else:
+ lr = 0.001
+ decay = 2.5e-5
+
+if args.lr is not None:
+ lr = args.lr
+
+if args.decay is not None:
+ decay = args.decay
+
+if retrain:
+ input_model = args.retrain
+
+def plc_loss(alpha=1.0, bias=0.):
+ def loss(y_true,y_pred):
+ mask = y_true[:,:,-1:]
+ y_true = y_true[:,:,:-1]
+ e = (y_pred - y_true)*mask
+ e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
+ bias_mask = K.minimum(1., K.maximum(0., 4*y_true[:,:,-1:]))
+ l1_loss = K.mean(K.abs(e)) + 0.1*K.mean(K.maximum(0., -e[:,:,-1:])) + alpha*K.mean(K.abs(e_bands) + bias*bias_mask*K.maximum(0., e_bands)) + K.mean(K.minimum(K.abs(e[:,:,18:19]),1.)) + 8*K.mean(K.minimum(K.abs(e[:,:,18:19]),.4))
+ return l1_loss
+ return loss
+
+def plc_l1_loss():
+ def L1_loss(y_true,y_pred):
+ mask = y_true[:,:,-1:]
+ y_true = y_true[:,:,:-1]
+ e = (y_pred - y_true)*mask
+ l1_loss = K.mean(K.abs(e))
+ return l1_loss
+ return L1_loss
+
+def plc_ceps_loss():
+ def ceps_loss(y_true,y_pred):
+ mask = y_true[:,:,-1:]
+ y_true = y_true[:,:,:-1]
+ e = (y_pred - y_true)*mask
+ l1_loss = K.mean(K.abs(e[:,:,:-2]))
+ return l1_loss
+ return ceps_loss
+
+def plc_band_loss():
+ def L1_band_loss(y_true,y_pred):
+ mask = y_true[:,:,-1:]
+ y_true = y_true[:,:,:-1]
+ e = (y_pred - y_true)*mask
+ e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
+ l1_loss = K.mean(K.abs(e_bands))
+ return l1_loss
+ return L1_band_loss
+
+def plc_pitch_loss():
+ def pitch_loss(y_true,y_pred):
+ mask = y_true[:,:,-1:]
+ y_true = y_true[:,:,:-1]
+ e = (y_pred - y_true)*mask
+ l1_loss = K.mean(K.minimum(K.abs(e[:,:,18:19]),.4))
+ return l1_loss
+ return pitch_loss
+
+opt = Adam(lr, decay=decay, beta_2=0.99)
+strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
+
+with strategy.scope():
+ model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=batch_size, training=True, quantize=quantize, cond_size=args.cond_size)
+ model.compile(optimizer=opt, loss=plc_loss(alpha=args.band_loss, bias=args.loss_bias), metrics=[plc_l1_loss(), plc_ceps_loss(), plc_band_loss(), plc_pitch_loss()])
+ model.summary()
+
+lpc_order = 16
+
+feature_file = args.features
+nb_features = model.nb_used_features + lpc_order + model.nb_burg_features
+nb_used_features = model.nb_used_features
+nb_burg_features = model.nb_burg_features
+sequence_size = args.seq_length
+
+# u for unquantised, load 16 bit PCM samples and convert to mu-law
+
+
+features = np.memmap(feature_file, dtype='float32', mode='r')
+nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
+features = features[:nb_sequences*sequence_size*nb_features]
+
+features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
+
+features = features[:, :, :nb_used_features+model.nb_burg_features]
+
+lost = np.memmap(args.lost_file, dtype='int8', mode='r')
+
+# dump models to disk as we go
+checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.gru_size, '{epoch:02d}'))
+
+if args.retrain is not None:
+ model.load_weights(args.retrain)
+
+if quantize or retrain:
+ #Adapting from an existing model
+ model.load_weights(input_model)
+
+model.save_weights('{}_{}_initial.h5'.format(args.output, args.gru_size))
+
+loader = PLCLoader(features, lost, nb_burg_features, batch_size)
+
+callbacks = [checkpoint]
+if args.logdir is not None:
+ logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.gru_size)
+ tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
+ callbacks.append(tensorboard_callback)
+
+model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)
diff --git a/dnn/training_tf2/train_rdovae.py b/dnn/training_tf2/train_rdovae.py
new file mode 100644
index 00000000..b474b5f6
--- /dev/null
+++ b/dnn/training_tf2/train_rdovae.py
@@ -0,0 +1,151 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+ Copyright (c) 2018-2019 Mozilla
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+# Train an LPCNet model
+import tensorflow as tf
+strategy = tf.distribute.MultiWorkerMirroredStrategy()
+
+
+import argparse
+#from plc_loader import PLCLoader
+
+parser = argparse.ArgumentParser(description='Train a quantization model')
+
+parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
+parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
+parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
+group1 = parser.add_mutually_exclusive_group()
+group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
+group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
+parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
+parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
+parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
+parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
+parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
+parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
+parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
+
+
+args = parser.parse_args()
+
+import importlib
+rdovae = importlib.import_module(args.model)
+
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
+import tensorflow.keras.backend as K
+import h5py
+
+#gpus = tf.config.experimental.list_physical_devices('GPU')
+#if gpus:
+# try:
+# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
+# except RuntimeError as e:
+# print(e)
+
+nb_epochs = args.epochs
+
+# Try reducing batch_size if you run out of memory on your GPU
+batch_size = args.batch_size
+
+quantize = args.quantize is not None
+retrain = args.retrain is not None
+
+if quantize:
+ lr = 0.00003
+ decay = 0
+ input_model = args.quantize
+else:
+ lr = 0.001
+ decay = 2.5e-5
+
+if args.lr is not None:
+ lr = args.lr
+
+if args.decay is not None:
+ decay = args.decay
+
+if retrain:
+ input_model = args.retrain
+
+
+opt = Adam(lr, decay=decay, beta_2=0.99)
+
+with strategy.scope():
+ model, encoder, decoder, _ = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size, nb_quant=16)
+ model.compile(optimizer=opt, loss=[rdovae.feat_dist_loss, rdovae.feat_dist_loss, rdovae.sq1_rate_loss, rdovae.sq2_rate_loss], loss_weights=[.5, .5, 1., .1], metrics={'hard_bits':rdovae.sq_rate_metric})
+ model.summary()
+
+lpc_order = 16
+
+feature_file = args.features
+nb_features = model.nb_used_features + lpc_order
+nb_used_features = model.nb_used_features
+sequence_size = args.seq_length
+
+# u for unquantised, load 16 bit PCM samples and convert to mu-law
+
+
+features = np.memmap(feature_file, dtype='float32', mode='r')
+nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
+features = features[:nb_sequences*sequence_size*nb_features]
+
+features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
+print(features.shape)
+features = features[:, :, :nb_used_features]
+
+#lambda_val = np.repeat(np.random.uniform(.0007, .002, (features.shape[0], 1, 1)), features.shape[1]//2, axis=1)
+#quant_id = np.round(10*np.log(lambda_val/.0007)).astype('int16')
+#quant_id = quant_id[:,:,0]
+quant_id = np.repeat(np.random.randint(16, size=(features.shape[0], 1, 1), dtype='int16'), features.shape[1]//2, axis=1)
+lambda_val = .0002*np.exp(quant_id/3.8)
+quant_id = quant_id[:,:,0]
+
+# dump models to disk as we go
+checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.cond_size, '{epoch:02d}'))
+
+if args.retrain is not None:
+ model.load_weights(args.retrain)
+
+if quantize or retrain:
+ #Adapting from an existing model
+ model.load_weights(input_model)
+
+model.save_weights('{}_{}_initial.h5'.format(args.output, args.cond_size))
+
+callbacks = [checkpoint]
+#callbacks = []
+
+if args.logdir is not None:
+ logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.cond_size)
+ tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
+ callbacks.append(tensorboard_callback)
+
+model.fit([features, quant_id, lambda_val], [features, features, features, features], batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)
diff --git a/dnn/training_tf2/ulaw.py b/dnn/training_tf2/ulaw.py
new file mode 100644
index 00000000..b79d4315
--- /dev/null
+++ b/dnn/training_tf2/ulaw.py
@@ -0,0 +1,19 @@
+
+import numpy as np
+import math
+
+scale = 255.0/32768.0
+scale_1 = 32768.0/255.0
+def ulaw2lin(u):
+ u = u - 128
+ s = np.sign(u)
+ u = np.abs(u)
+ return s*scale_1*(np.exp(u/128.*math.log(256))-1)
+
+
+def lin2ulaw(x):
+ s = np.sign(x)
+ x = np.abs(x)
+ u = (s*(128*np.log(1+scale*x)/math.log(256)))
+ u = np.clip(128 + np.round(u), 0, 255)
+ return u.astype('int16')
diff --git a/dnn/training_tf2/uniform_noise.py b/dnn/training_tf2/uniform_noise.py
new file mode 100644
index 00000000..6197dd5f
--- /dev/null
+++ b/dnn/training_tf2/uniform_noise.py
@@ -0,0 +1,78 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains the UniformNoise layer."""
+
+
+import tensorflow.compat.v2 as tf
+
+from tensorflow.keras import backend
+
+from tensorflow.keras.layers import Layer
+
+class UniformNoise(Layer):
+ """Apply additive zero-centered uniform noise.
+
+ This is useful to mitigate overfitting
+ (you could see it as a form of random data augmentation).
+ Gaussian Noise (GS) is a natural choice as corruption process
+ for real valued inputs.
+
+ As it is a regularization layer, it is only active at training time.
+
+ Args:
+ stddev: Float, standard deviation of the noise distribution.
+ seed: Integer, optional random seed to enable deterministic behavior.
+
+ Call arguments:
+ inputs: Input tensor (of any rank).
+ training: Python boolean indicating whether the layer should behave in
+ training mode (adding noise) or in inference mode (doing nothing).
+
+ Input shape:
+ Arbitrary. Use the keyword argument `input_shape`
+ (tuple of integers, does not include the samples axis)
+ when using this layer as the first layer in a model.
+
+ Output shape:
+ Same shape as input.
+ """
+
+
+
+
+ def __init__(self, stddev=0.5, seed=None, **kwargs):
+ super().__init__(**kwargs)
+ self.supports_masking = True
+ self.stddev = stddev
+
+
+ def call(self, inputs, training=None):
+ def noised():
+ return inputs + backend.random_uniform(
+ shape=tf.shape(inputs),
+ minval=-self.stddev,
+ maxval=self.stddev,
+ dtype=inputs.dtype,
+ )
+
+ return backend.in_train_phase(noised, inputs, training=training)
+
+ def get_config(self):
+ config = {"stddev": self.stddev}
+ base_config = super().get_config()
+ return dict(list(base_config.items()) + list(config.items()))
+
+ def compute_output_shape(self, input_shape):
+ return input_shape
diff --git a/dnn/vec.h b/dnn/vec.h
new file mode 100644
index 00000000..8e96cbf3
--- /dev/null
+++ b/dnn/vec.h
@@ -0,0 +1,389 @@
+/* Copyright (c) 2018 Mozilla
+ 2008-2011 Octasic Inc.
+ 2012-2017 Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef VEC_H
+#define VEC_H
+
+#include "opus_types.h"
+#include <math.h>
+#include "arch.h"
+#include "x86/x86_arch_macros.h"
+
+
+#if defined(__AVX__) || defined(__SSE2__)
+#include "vec_avx.h"
+#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && !defined(DISABLE_NEON)
+#include "vec_neon.h"
+#else
+
+#include "os_support.h"
+
+#define MAX_INPUTS (2048)
+
+#define NO_OPTIMIZATIONS
+
+static inline void sgemv16x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
+{
+ int i, j;
+ OPUS_CLEAR(out, rows);
+ for (i=0;i<rows;i+=16)
+ {
+ for (j=0;j<cols;j++)
+ {
+ const float * restrict w;
+ float * restrict y;
+ float xj;
+ w = &weights[j*col_stride + i];
+ xj = x[j];
+ y = &out[i];
+ y[0] += w[0]*xj;
+ y[1] += w[1]*xj;
+ y[2] += w[2]*xj;
+ y[3] += w[3]*xj;
+ y[4] += w[4]*xj;
+ y[5] += w[5]*xj;
+ y[6] += w[6]*xj;
+ y[7] += w[7]*xj;
+ y[8] += w[8]*xj;
+ y[9] += w[9]*xj;
+ y[10] += w[10]*xj;
+ y[11] += w[11]*xj;
+ y[12] += w[12]*xj;
+ y[13] += w[13]*xj;
+ y[14] += w[14]*xj;
+ y[15] += w[15]*xj;
+ }
+ }
+}
+
+static inline void sgemv8x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
+{
+ int i, j;
+ OPUS_CLEAR(out, rows);
+ for (i=0;i<rows;i+=8)
+ {
+ for (j=0;j<cols;j++)
+ {
+ const float * restrict w;
+ float * restrict y;
+ float xj;
+ w = &weights[j*col_stride + i];
+ xj = x[j];
+ y = &out[i];
+ y[0] += w[0]*xj;
+ y[1] += w[1]*xj;
+ y[2] += w[2]*xj;
+ y[3] += w[3]*xj;
+ y[4] += w[4]*xj;
+ y[5] += w[5]*xj;
+ y[6] += w[6]*xj;
+ y[7] += w[7]*xj;
+ }
+ }
+}
+
+static inline void sgemv(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
+{
+ if ((rows&0xf) == 0) sgemv16x1(out, weights, rows, cols, col_stride, x);
+ else if ((rows&0x7) == 0) sgemv8x1(out, weights, rows, cols, col_stride, x);
+ else {
+ int i, j;
+ for (i=0;i<rows;i++)
+ {
+ out[i] = 0;
+ for (j=0;j<cols;j++) out[i] += weights[j*col_stride + i]*x[j];
+ }
+ }
+}
+
+static inline void sparse_sgemv8x4(float *out, const float *w, const int *idx, int rows, const float *x)
+{
+ int i, j;
+ OPUS_CLEAR(out, rows);
+ for (i=0;i<rows;i+=8)
+ {
+ int cols;
+ cols = *idx++;
+ for (j=0;j<cols;j++)
+ {
+ int pos;
+ float * restrict y;
+ float xj0, xj1, xj2, xj3;
+ pos = (*idx++);
+ xj0 = x[pos+0];
+ xj1 = x[pos+1];
+ xj2 = x[pos+2];
+ xj3 = x[pos+3];
+ y = &out[i];
+ y[0] += w[0]*xj0;
+ y[1] += w[1]*xj0;
+ y[2] += w[2]*xj0;
+ y[3] += w[3]*xj0;
+ y[4] += w[4]*xj0;
+ y[5] += w[5]*xj0;
+ y[6] += w[6]*xj0;
+ y[7] += w[7]*xj0;
+
+ y[0] += w[8]*xj1;
+ y[1] += w[9]*xj1;
+ y[2] += w[10]*xj1;
+ y[3] += w[11]*xj1;
+ y[4] += w[12]*xj1;
+ y[5] += w[13]*xj1;
+ y[6] += w[14]*xj1;
+ y[7] += w[15]*xj1;
+
+ y[0] += w[16]*xj2;
+ y[1] += w[17]*xj2;
+ y[2] += w[18]*xj2;
+ y[3] += w[19]*xj2;
+ y[4] += w[20]*xj2;
+ y[5] += w[21]*xj2;
+ y[6] += w[22]*xj2;
+ y[7] += w[23]*xj2;
+
+ y[0] += w[24]*xj3;
+ y[1] += w[25]*xj3;
+ y[2] += w[26]*xj3;
+ y[3] += w[27]*xj3;
+ y[4] += w[28]*xj3;
+ y[5] += w[29]*xj3;
+ y[6] += w[30]*xj3;
+ y[7] += w[31]*xj3;
+ w += 32;
+ }
+ }
+}
+
+#ifdef USE_SU_BIAS
+static inline void sparse_cgemv8x4(float *out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
+{
+ int i, j;
+ unsigned char x[MAX_INPUTS];
+ for (i=0;i<rows;i++) out[i] = 0;
+ for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);
+ for (i=0;i<rows;i+=8)
+ {
+ int colblocks;
+ colblocks = *idx++;
+ for (j=0;j<colblocks;j++)
+ {
+ int pos;
+ float * restrict y;
+ int xj0, xj1, xj2, xj3;
+ pos = (*idx++);
+ xj0 = x[pos+0];
+ xj1 = x[pos+1];
+ xj2 = x[pos+2];
+ xj3 = x[pos+3];
+ y = &out[i];
+ y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
+ y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
+ y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
+ y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
+ y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
+ y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
+ y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
+ y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
+ w += 32;
+ }
+ }
+ for (i=0;i<rows;i++) out[i] *= scale[i];
+}
+static inline void cgemv8x4(float *out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
+{
+ int i, j;
+ unsigned char x[MAX_INPUTS];
+ for (i=0;i<rows;i++) out[i] = 0;
+ for (i=0;i<cols;i++) x[i] = 127+(int)floor(.5+127*_x[i]);
+ for (i=0;i<rows;i+=8)
+ {
+ for (j=0;j<cols;j+=4)
+ {
+ float *y;
+ float xj0, xj1, xj2, xj3;
+ xj0 = x[j+0];
+ xj1 = x[j+1];
+ xj2 = x[j+2];
+ xj3 = x[j+3];
+ y = &out[i];
+ y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
+ y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
+ y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
+ y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
+ y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
+ y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
+ y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
+ y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
+ w += 32;
+ }
+ }
+ for (i=0;i<rows;i++) out[i] *= scale[i];
+}
+#else
+static inline void sparse_cgemv8x4(float *out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
+{
+ int i, j;
+ opus_int8 x[MAX_INPUTS];
+ for (i=0;i<rows;i++) out[i] = 0;
+ for (i=0;i<cols;i++) x[i] = (int)floor(.5+127*_x[i]);
+ for (i=0;i<rows;i+=8)
+ {
+ int colblocks;
+ colblocks = *idx++;
+ for (j=0;j<colblocks;j++)
+ {
+ int pos;
+ float * restrict y;
+ int xj0, xj1, xj2, xj3;
+ pos = (*idx++);
+ xj0 = x[pos+0];
+ xj1 = x[pos+1];
+ xj2 = x[pos+2];
+ xj3 = x[pos+3];
+ y = &out[i];
+ y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
+ y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
+ y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
+ y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
+ y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
+ y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
+ y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
+ y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
+ w += 32;
+ }
+ }
+ for (i=0;i<rows;i++) out[i] *= scale[i];
+}
+static inline void cgemv8x4(float *out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
+{
+ int i, j;
+ opus_int8 x[MAX_INPUTS];
+ for (i=0;i<rows;i++) out[i] = 0;
+ for (i=0;i<cols;i++) x[i] = (int)floor(.5+127*_x[i]);
+ for (i=0;i<rows;i+=8)
+ {
+ for (j=0;j<cols;j+=4)
+ {
+ float *y;
+ float xj0, xj1, xj2, xj3;
+ xj0 = x[j+0];
+ xj1 = x[j+1];
+ xj2 = x[j+2];
+ xj3 = x[j+3];
+ y = &out[i];
+ y[0] += (w[0]*xj0+w[1]*xj1+w[2]*xj2+w[3]*xj3);
+ y[1] += (w[4]*xj0+w[5]*xj1+w[6]*xj2+w[7]*xj3);
+ y[2] += (w[8]*xj0+w[9]*xj1+w[10]*xj2+w[11]*xj3);
+ y[3] += (w[12]*xj0+w[13]*xj1+w[14]*xj2+w[15]*xj3);
+ y[4] += (w[16]*xj0+w[17]*xj1+w[18]*xj2+w[19]*xj3);
+ y[5] += (w[20]*xj0+w[21]*xj1+w[22]*xj2+w[23]*xj3);
+ y[6] += (w[24]*xj0+w[25]*xj1+w[26]*xj2+w[27]*xj3);
+ y[7] += (w[28]*xj0+w[29]*xj1+w[30]*xj2+w[31]*xj3);
+ w += 32;
+ }
+ }
+ for (i=0;i<rows;i++) out[i] *= scale[i];
+}
+#endif
+
+/* No AVX2/FMA support */
+#ifndef LPCNET_TEST
+static inline float lpcnet_exp2(float x)
+{
+ int integer;
+ float frac;
+ union {
+ float f;
+ opus_uint32 i;
+ } res;
+ integer = floor(x);
+ if (integer < -50)
+ return 0;
+ frac = x-integer;
+ /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
+ res.f = 0.99992522f + frac * (0.69583354f
+ + frac * (0.22606716f + 0.078024523f*frac));
+ res.i = (res.i + (integer<<23)) & 0x7fffffff;
+ return res.f;
+}
+#define lpcnet_exp(x) lpcnet_exp2((x)*1.44269504f)
+
+#define fmadd(a, b, c) ((a)*(b)+(c))
+static OPUS_INLINE float tanh_approx(float x)
+{
+ const float N0 = 952.52801514f;
+ const float N1 = 96.39235687f;
+ const float N2 = 0.60863042f;
+ const float D0 = 952.72399902f;
+ const float D1 = 413.36801147f;
+ const float D2 = 11.88600922f;
+ float X2, num, den;
+ X2 = x*x;
+ num = fmadd(fmadd(N2, X2, N1), X2, N0);
+ den = fmadd(fmadd(D2, X2, D1), X2, D0);
+ num = num*x/den;
+ return MAX32(-1.f, MIN32(1.f, num));
+}
+
+static inline float sigmoid_approx(float x)
+{
+ return .5f + .5f*tanh_approx(.5f*x);
+}
+
+static inline void softmax(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N;i++)
+ y[i] = lpcnet_exp(x[i]);
+}
+
+static inline void vec_tanh(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N;i++)
+ {
+ y[i] = tanh_approx(x[i]);
+ }
+}
+
+static inline void vec_sigmoid(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N;i++)
+ {
+ y[i] = sigmoid_approx(x[i]);
+ }
+}
+#endif
+
+#define SCALE (128.f*127.f)
+#define SCALE_1 (1.f/128.f/127.f)
+
+#endif /*no optimizations*/
+#endif /*VEC_H*/
diff --git a/dnn/vec_avx.h b/dnn/vec_avx.h
new file mode 100644
index 00000000..979e77fe
--- /dev/null
+++ b/dnn/vec_avx.h
@@ -0,0 +1,884 @@
+/* Copyright (c) 2018 Mozilla
+ 2012-2017 Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/*
+ AVX implementation of vector operations, compile with -mavx
+ AVX2/FMA implementation of vector operations, compile with -mavx2 -mfma
+*/
+
+#ifndef VEC_AVX_H
+#define VEC_AVX_H
+
+#include <immintrin.h>
+#include <math.h>
+#include "celt/x86/x86cpu.h"
+
+#define MAX_INPUTS (2048)
+
+#define USE_SU_BIAS
+
+#ifndef __SSE_4_1__
+static inline __m128 mm_floor_ps(__m128 x) {
+ __m128 half = _mm_set1_ps(0.5);
+ return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_sub_ps(x, half)));
+}
+#undef _mm_floor_ps
+#define _mm_floor_ps(x) mm_floor_ps(x)
+#endif
+
+
+/* If we don't have AVX available, emulate what we need with SSE up to 4.1. */
+#ifndef __AVX__
+
+typedef struct {
+ __m128 lo;
+ __m128 hi;
+} mm256_emu;
+#define __m256 mm256_emu
+
+static inline mm256_emu mm256_loadu_ps(const float *src) {
+ mm256_emu ret;
+ ret.lo = _mm_loadu_ps(&src[0]);
+ ret.hi = _mm_loadu_ps(&src[4]);
+ return ret;
+}
+#define _mm256_loadu_ps(src) mm256_loadu_ps(src)
+
+
+static inline void mm256_storeu_ps(float *dst, mm256_emu src) {
+ _mm_storeu_ps(dst, src.lo);
+ _mm_storeu_ps(&dst[4], src.hi);
+}
+#define _mm256_storeu_ps(dst, src) mm256_storeu_ps(dst, src)
+
+
+static inline mm256_emu mm256_setzero_ps(void) {
+ mm256_emu ret;
+ ret.lo = _mm_setzero_ps();
+ ret.hi = ret.lo;
+ return ret;
+}
+#define _mm256_setzero_ps mm256_setzero_ps
+
+static inline mm256_emu mm256_broadcast_ss(const float *x) {
+ mm256_emu ret;
+ ret.lo = _mm_set1_ps(*x);
+ ret.hi = ret.lo;
+ return ret;
+}
+#define _mm256_broadcast_ss(x) mm256_broadcast_ss(x)
+
+static inline mm256_emu mm256_set1_ps(float x) {
+ mm256_emu ret;
+ ret.lo = _mm_set1_ps(x);
+ ret.hi = ret.lo;
+ return ret;
+}
+#define _mm256_set1_ps(x) mm256_set1_ps(x)
+
+
+
+static inline mm256_emu mm256_mul_ps(mm256_emu a, mm256_emu b) {
+ mm256_emu ret;
+ ret.lo = _mm_mul_ps(a.lo, b.lo);
+ ret.hi = _mm_mul_ps(a.hi, b.hi);
+ return ret;
+}
+#define _mm256_mul_ps(a,b) mm256_mul_ps(a,b)
+
+static inline mm256_emu mm256_add_ps(mm256_emu a, mm256_emu b) {
+ mm256_emu ret;
+ ret.lo = _mm_add_ps(a.lo, b.lo);
+ ret.hi = _mm_add_ps(a.hi, b.hi);
+ return ret;
+}
+#define _mm256_add_ps(a,b) mm256_add_ps(a,b)
+
+
+static inline mm256_emu mm256_max_ps(mm256_emu a, mm256_emu b) {
+ mm256_emu ret;
+ ret.lo = _mm_max_ps(a.lo, b.lo);
+ ret.hi = _mm_max_ps(a.hi, b.hi);
+ return ret;
+}
+#define _mm256_max_ps(a,b) mm256_max_ps(a,b)
+
+static inline mm256_emu mm256_min_ps(mm256_emu a, mm256_emu b) {
+ mm256_emu ret;
+ ret.lo = _mm_min_ps(a.lo, b.lo);
+ ret.hi = _mm_min_ps(a.hi, b.hi);
+ return ret;
+}
+#define _mm256_min_ps(a,b) mm256_min_ps(a,b)
+
+static inline mm256_emu mm256_rcp_ps(mm256_emu a) {
+ mm256_emu ret;
+ ret.lo = _mm_rcp_ps(a.lo);
+ ret.hi = _mm_rcp_ps(a.hi);
+ return ret;
+}
+#define _mm256_rcp_ps(a) mm256_rcp_ps(a)
+
+
+static inline __m128 mm256_extractf128_ps(mm256_emu x, int i) {
+ return (i==0) ? x.lo : x.hi;
+}
+#undef _mm256_extractf128_ps
+#define _mm256_extractf128_ps(x,i) mm256_extractf128_ps(x,i)
+
+static inline mm256_emu mm256_insertf128_ps(mm256_emu dst, __m128 src, int i) {
+ if (i==0) dst.lo = src;
+ else dst.hi = src;
+ return dst;
+}
+#undef _mm256_insertf128_ps
+#define _mm256_insertf128_ps(dst,src,i) mm256_insertf128_ps(dst,src,i)
+
+#endif /* __AVX__ */
+
+
+
+/* If we don't have AVX2 available, emulate what we need with SSE up to 4.1. */
+#ifndef __AVX2__
+
+typedef struct {
+ __m128i lo;
+ __m128i hi;
+} mm256i_emu;
+typedef __m256i real_m256i;
+#define __m256i mm256i_emu
+
+static inline mm256i_emu mm256_setzero_si256(void) {
+ mm256i_emu ret;
+ ret.lo = _mm_setzero_si128();
+ ret.hi = ret.lo;
+ return ret;
+}
+#define _mm256_setzero_si256 mm256_setzero_si256
+
+
+static inline mm256i_emu mm256_loadu_si256(const mm256i_emu *src) {
+ mm256i_emu ret;
+ ret.lo = _mm_loadu_si128((const __m128i*)src);
+ ret.hi = _mm_loadu_si128(&((const __m128i*)src)[1]);
+ return ret;
+}
+#define _mm256_loadu_si256(src) mm256_loadu_si256(src)
+
+
+static inline void mm256_storeu_si256(mm256i_emu *dst, mm256i_emu src) {
+ _mm_storeu_si128((__m128i*)dst, src.lo);
+ _mm_storeu_si128(&((__m128i*)dst)[1], src.hi);
+}
+#define _mm256_storeu_si256(dst, src) mm256_storeu_si256(dst, src)
+
+
+static inline mm256i_emu mm256_broadcastd_epi32(__m128i x) {
+ mm256i_emu ret;
+ ret.hi = ret.lo = _mm_shuffle_epi32(x, 0);
+ return ret;
+}
+#define _mm256_broadcastd_epi32(x) mm256_broadcastd_epi32(x)
+
+
+static inline mm256i_emu mm256_set1_epi32(int x) {
+ mm256i_emu ret;
+ ret.lo = _mm_set1_epi32(x);
+ ret.hi = ret.lo;
+ return ret;
+}
+#define _mm256_set1_epi32(x) mm256_set1_epi32(x)
+
+static inline mm256i_emu mm256_set1_epi16(int x) {
+ mm256i_emu ret;
+ ret.lo = _mm_set1_epi16(x);
+ ret.hi = ret.lo;
+ return ret;
+}
+#define _mm256_set1_epi16(x) mm256_set1_epi16(x)
+
+
+static inline mm256i_emu mm256_add_epi32(mm256i_emu a, mm256i_emu b) {
+ mm256i_emu ret;
+ ret.lo = _mm_add_epi32(a.lo, b.lo);
+ ret.hi = _mm_add_epi32(a.hi, b.hi);
+ return ret;
+}
+#define _mm256_add_epi32(a,b) mm256_add_epi32(a,b)
+
+static inline mm256i_emu mm256_madd_epi16(mm256i_emu a, mm256i_emu b) {
+ mm256i_emu ret;
+ ret.lo = _mm_madd_epi16(a.lo, b.lo);
+ ret.hi = _mm_madd_epi16(a.hi, b.hi);
+ return ret;
+}
+#define _mm256_madd_epi16(a,b) mm256_madd_epi16(a,b)
+
+static inline mm256i_emu mm256_maddubs_epi16(mm256i_emu a, mm256i_emu b) {
+ mm256i_emu ret;
+ ret.lo = _mm_maddubs_epi16(a.lo, b.lo);
+ ret.hi = _mm_maddubs_epi16(a.hi, b.hi);
+ return ret;
+}
+#define _mm256_maddubs_epi16(a,b) mm256_maddubs_epi16(a,b)
+
+
+
+/* Emulating the conversion functions is tricky because they use __m256i but are defined in AVX.
+ So we need to make a special when only AVX is available. */
+#ifdef __AVX__
+
+typedef union {
+ mm256i_emu fake;
+ real_m256i real;
+} mm256_union;
+
+static inline __m256 mm256_cvtepi32_ps(mm256i_emu a) {
+ mm256_union src;
+ src.fake = a;
+ return _mm256_cvtepi32_ps(src.real);
+}
+#define _mm256_cvtepi32_ps(a) mm256_cvtepi32_ps(a)
+
+static inline mm256i_emu mm256_cvtps_epi32(__m256 a) {
+ mm256_union ret;
+ ret.real = _mm256_cvtps_epi32(a);
+ return ret.fake;
+}
+#define _mm256_cvtps_epi32(a) mm256_cvtps_epi32(a)
+
+
+#else
+
+static inline mm256_emu mm256_cvtepi32_ps(mm256i_emu a) {
+ mm256_emu ret;
+ ret.lo = _mm_cvtepi32_ps(a.lo);
+ ret.hi = _mm_cvtepi32_ps(a.hi);
+ return ret;
+}
+#define _mm256_cvtepi32_ps(a) mm256_cvtepi32_ps(a)
+
+static inline mm256i_emu mm256_cvtps_epi32(mm256_emu a) {
+ mm256i_emu ret;
+ ret.lo = _mm_cvtps_epi32(a.lo);
+ ret.hi = _mm_cvtps_epi32(a.hi);
+ return ret;
+}
+#define _mm256_cvtps_epi32(a) mm256_cvtps_epi32(a)
+
+#endif /* __AVX__ */
+
+
+#endif /* __AVX2__ */
+
+/* In case we don't have FMA, make it a mul and an add. */
+#if !(defined(__FMA__) && defined(__AVX__))
+#define _mm256_fmadd_ps(a,b,c) _mm256_add_ps(_mm256_mul_ps(a, b), c)
+#define _mm_fmadd_ps(a,b,c) _mm_add_ps(_mm_mul_ps(a, b), c)
+#endif
+
+#ifdef __AVX2__
+static inline __m256 exp8_approx(__m256 X)
+{
+ const __m256 K0 = _mm256_set1_ps(0.99992522f);
+ const __m256 K1 = _mm256_set1_ps(0.69583354f);
+ const __m256 K2 = _mm256_set1_ps(0.22606716f);
+ const __m256 K3 = _mm256_set1_ps(0.078024523f);
+ const __m256 log2_E = _mm256_set1_ps(1.44269504f);
+ const __m256 max_in = _mm256_set1_ps(50.f);
+ const __m256 min_in = _mm256_set1_ps(-50.f);
+ __m256 XF, Y;
+ __m256i I;
+ X = _mm256_mul_ps(X, log2_E);
+ X = _mm256_max_ps(min_in, _mm256_min_ps(max_in, X));
+ XF = _mm256_floor_ps(X);
+ I = _mm256_cvtps_epi32(XF);
+ X = _mm256_sub_ps(X, XF);
+ Y = _mm256_fmadd_ps(_mm256_fmadd_ps(_mm256_fmadd_ps(K3, X, K2), X, K1), X, K0);
+ I = _mm256_slli_epi32(I, 23);
+ Y = _mm256_castsi256_ps(_mm256_add_epi32(I, _mm256_castps_si256(Y)));
+ return Y;
+}
+
+static inline void vector_ps_to_epi8(unsigned char *x, const float *_x, int len) {
+ int i;
+ __m256 const127 = _mm256_set1_ps(127.f);
+ for (i=0;i<len;i+=8) {
+ __m256 xf;
+ __m256i xi;
+ xf = _mm256_loadu_ps(&_x[i]);
+ xf = _mm256_fmadd_ps(xf, const127, const127);
+ xi = _mm256_cvtps_epi32(xf);
+ xi = _mm256_packus_epi32(xi, _mm256_setzero_si256());
+ xi = _mm256_permute4x64_epi64(xi, 0xD8);
+ xi = _mm256_packus_epi16(xi, _mm256_setzero_si256());
+ xi = _mm256_permutevar8x32_epi32(xi, _mm256_setr_epi32(0,1, 0,0, 0,0, 0,0));
+ _mm256_storeu_si256 ((__m256i *)(void*)&x[i], xi);
+ }
+}
+
+#else
+static inline __m128 exp4_approx(__m128 X)
+{
+ const __m128 K0 = _mm_set1_ps(0.99992522f);
+ const __m128 K1 = _mm_set1_ps(0.69583354f);
+ const __m128 K2 = _mm_set1_ps(0.22606716f);
+ const __m128 K3 = _mm_set1_ps(0.078024523f);
+ const __m128 log2_E = _mm_set1_ps(1.44269504);
+ const __m128 max_in = _mm_set1_ps(50.f);
+ const __m128 min_in = _mm_set1_ps(-50.f);
+ const __m128i mask = _mm_set1_epi32(0x7fffffff);
+ __m128 XF, Y;
+ __m128i I;
+ X = _mm_mul_ps(X, log2_E);
+ X = _mm_max_ps(min_in, _mm_min_ps(max_in, X));
+ XF = _mm_floor_ps(X);
+ I = _mm_cvtps_epi32(XF);
+ X = _mm_sub_ps(X, XF);
+ Y = _mm_fmadd_ps(_mm_fmadd_ps(_mm_fmadd_ps(K3, X, K2), X, K1), X, K0);
+ I = _mm_slli_epi32(I, 23);
+ Y = _mm_castsi128_ps(_mm_and_si128(mask, _mm_add_epi32(I, _mm_castps_si128(Y))));
+ return Y;
+}
+static inline __m256 exp8_approx(__m256 X)
+{
+ __m256 Y;
+ __m128 Xhi, Xlo, Yhi, Ylo;
+ Xhi = _mm256_extractf128_ps(X, 1);
+ Xlo = _mm256_extractf128_ps(X, 0);
+ Yhi = exp4_approx(Xhi);
+ Ylo = exp4_approx(Xlo);
+ Y = _mm256_insertf128_ps(_mm256_setzero_ps(), Yhi, 1);
+ Y = _mm256_insertf128_ps(Y, Ylo, 0);
+ return Y;
+}
+
+static inline void vector_ps_to_epi8(unsigned char *x, const float *_x, int len) {
+ int i;
+ for (i=0;i<len;i++) x[i] = 127+(int)floor(.5+127*_x[i]);
+}
+
+#endif
+
+
+#ifdef __AVX__
+
+/* Approximating tanh() using a Padé-like rational function:
+ tanh(x) ~= x * (N0 + N1*x^2 + N2*x^4)/(D0 + D1*x^2 + D2*x^4)
+ subject to the +/- 1 bounds.
+ The coefficients were determined by gradient descent trying to minimize
+ the maximum deviation over the whole range (this is only possible because
+ of the bounds). The max error is around 3e-4 and is dominated by the
+ reciprocal approximation (the max error of the rational function is
+ around 6e-5).
+ */
+static inline __m256 tanh8_approx(__m256 X)
+{
+ const __m256 N0 = _mm256_set1_ps(952.52801514f);
+ const __m256 N1 = _mm256_set1_ps(96.39235687f);
+ const __m256 N2 = _mm256_set1_ps(0.60863042f);
+ const __m256 D0 = _mm256_set1_ps(952.72399902f);
+ const __m256 D1 = _mm256_set1_ps(413.36801147f);
+ const __m256 D2 = _mm256_set1_ps(11.88600922f);
+ const __m256 max_out = _mm256_set1_ps(1.f);
+ const __m256 min_out = _mm256_set1_ps(-1.f);
+ __m256 X2, num, den;
+ X2 = _mm256_mul_ps(X, X);
+ num = _mm256_fmadd_ps(_mm256_fmadd_ps(N2, X2, N1), X2, N0);
+ den = _mm256_fmadd_ps(_mm256_fmadd_ps(D2, X2, D1), X2, D0);
+ num = _mm256_mul_ps(num, X);
+ den = _mm256_rcp_ps(den);
+ num = _mm256_mul_ps(num, den);
+ return _mm256_max_ps(min_out, _mm256_min_ps(max_out, num));
+}
+
+/* Sigmoid approximation using a Padé-like rational function:
+ 1/(1+exp(-x)) ~= 0.5 + x * (N0 + N1*x^2 + N2*x^4)/(D0 + D1*x^2 + D2*x^4)
+ subject to the [0, 1] bounds.
+ The coefficients are directly derived by dividing the tanh() coefficients
+ by powers of two to get the correct scaling. The max error is around 1.5e-4
+ and is dominated by the reciprocal approximation (the max error of the
+ rational function is around 3e-5).
+ */
+static inline __m256 sigmoid8_approx(__m256 X)
+{
+ const __m256 N0 = _mm256_set1_ps(238.13200378f);
+ const __m256 N1 = _mm256_set1_ps(6.02452230f);
+ const __m256 N2 = _mm256_set1_ps(0.00950985f);
+ const __m256 D0 = _mm256_set1_ps(952.72399902f);
+ const __m256 D1 = _mm256_set1_ps(103.34200287f);
+ const __m256 D2 = _mm256_set1_ps(0.74287558f);
+ const __m256 half = _mm256_set1_ps(0.5);
+ const __m256 max_out = _mm256_set1_ps(1.f);
+ const __m256 min_out = _mm256_set1_ps(0.f);
+ __m256 X2, num, den;
+ X2 = _mm256_mul_ps(X, X);
+ num = _mm256_fmadd_ps(_mm256_fmadd_ps(N2, X2, N1), X2, N0);
+ den = _mm256_fmadd_ps(_mm256_fmadd_ps(D2, X2, D1), X2, D0);
+ num = _mm256_mul_ps(num, X);
+ den = _mm256_rcp_ps(den);
+ num = _mm256_fmadd_ps(num, den, half);
+ return _mm256_max_ps(min_out, _mm256_min_ps(max_out, num));
+}
+
+static inline float tanh_approx(float x)
+{
+ float out[8];
+ __m256 X, Y;
+ X = _mm256_set1_ps(x);
+ Y = tanh8_approx(X);
+ _mm256_storeu_ps(out, Y);
+ return out[0];
+}
+
+static inline float sigmoid_approx(float x)
+{
+ float out[8];
+ __m256 X, Y;
+ X = _mm256_set1_ps(x);
+ Y = sigmoid8_approx(X);
+ _mm256_storeu_ps(out, Y);
+ return out[0];
+}
+
+#else
+
+static inline __m128 tanh4_approx(__m128 X)
+{
+ const __m128 N0 = _mm_set1_ps(952.52801514f);
+ const __m128 N1 = _mm_set1_ps(96.39235687f);
+ const __m128 N2 = _mm_set1_ps(0.60863042f);
+ const __m128 D0 = _mm_set1_ps(952.72399902f);
+ const __m128 D1 = _mm_set1_ps(413.36801147f);
+ const __m128 D2 = _mm_set1_ps(11.88600922f);
+ const __m128 max_out = _mm_set1_ps(1.f);
+ const __m128 min_out = _mm_set1_ps(-1.f);
+ __m128 X2, num, den;
+ X2 = _mm_mul_ps(X, X);
+ num = _mm_fmadd_ps(_mm_fmadd_ps(N2, X2, N1), X2, N0);
+ den = _mm_fmadd_ps(_mm_fmadd_ps(D2, X2, D1), X2, D0);
+ num = _mm_mul_ps(num, X);
+ den = _mm_rcp_ps(den);
+ num = _mm_mul_ps(num, den);
+ return _mm_max_ps(min_out, _mm_min_ps(max_out, num));
+}
+
+static inline __m128 sigmoid4_approx(__m128 X)
+{
+ const __m128 N0 = _mm_set1_ps(238.13200378f);
+ const __m128 N1 = _mm_set1_ps(6.02452230f);
+ const __m128 N2 = _mm_set1_ps(0.00950985f);
+ const __m128 D0 = _mm_set1_ps(952.72399902f);
+ const __m128 D1 = _mm_set1_ps(103.34200287f);
+ const __m128 D2 = _mm_set1_ps(0.74287558f);
+ const __m128 half = _mm_set1_ps(0.5);
+ const __m128 max_out = _mm_set1_ps(1.f);
+ const __m128 min_out = _mm_set1_ps(0.f);
+ __m128 X2, num, den;
+ X2 = _mm_mul_ps(X, X);
+ num = _mm_fmadd_ps(_mm_fmadd_ps(N2, X2, N1), X2, N0);
+ den = _mm_fmadd_ps(_mm_fmadd_ps(D2, X2, D1), X2, D0);
+ num = _mm_mul_ps(num, X);
+ den = _mm_rcp_ps(den);
+ num = _mm_fmadd_ps(num, den, half);
+ return _mm_max_ps(min_out, _mm_min_ps(max_out, num));
+}
+
+static inline float tanh_approx(float x)
+{
+ float out[4];
+ __m128 X, Y;
+ X = _mm_set1_ps(x);
+ Y = tanh4_approx(X);
+ _mm_storeu_ps(out, Y);
+ return out[0];
+}
+
+static inline float sigmoid_approx(float x)
+{
+ float out[4];
+ __m128 X, Y;
+ X = _mm_set1_ps(x);
+ Y = sigmoid4_approx(X);
+ _mm_storeu_ps(out, Y);
+ return out[0];
+}
+
+#endif
+
+static inline float lpcnet_exp(float x)
+{
+ float out[8];
+ __m256 X, Y;
+ X = _mm256_set1_ps(x);
+ Y = exp8_approx(X);
+ _mm256_storeu_ps(out, Y);
+ return out[0];
+}
+
+static inline void softmax(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N-7;i+=8)
+ {
+ __m256 X, Y;
+ X = _mm256_loadu_ps(&x[i]);
+ Y = exp8_approx(X);
+ _mm256_storeu_ps(&y[i], Y);
+ }
+ for (;i<N;i++)
+ y[i] = lpcnet_exp(x[i]);
+}
+
+#ifdef __AVX__
+static inline void vec_tanh(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N-7;i+=8)
+ {
+ __m256 X, Y;
+ X = _mm256_loadu_ps(&x[i]);
+ Y = tanh8_approx(X);
+ _mm256_storeu_ps(&y[i], Y);
+ }
+ for (;i<N;i++)
+ {
+ y[i] = tanh_approx(x[i]);
+ }
+}
+
+static inline void vec_sigmoid(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N-7;i+=8)
+ {
+ __m256 X, Y;
+ X = _mm256_loadu_ps(&x[i]);
+ Y = sigmoid8_approx(X);
+ _mm256_storeu_ps(&y[i], Y);
+ }
+ for (;i<N;i++)
+ {
+ y[i] = sigmoid_approx(x[i]);
+ }
+}
+#else
+static inline void vec_tanh(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N-3;i+=4)
+ {
+ __m128 X, Y;
+ X = _mm_loadu_ps(&x[i]);
+ Y = tanh4_approx(X);
+ _mm_storeu_ps(&y[i], Y);
+ }
+ for (;i<N;i++)
+ {
+ y[i] = tanh_approx(x[i]);
+ }
+}
+
+static inline void vec_sigmoid(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N-3;i+=4)
+ {
+ __m128 X, Y;
+ X = _mm_loadu_ps(&x[i]);
+ Y = sigmoid4_approx(X);
+ _mm_storeu_ps(&y[i], Y);
+ }
+ for (;i<N;i++)
+ {
+ y[i] = sigmoid_approx(x[i]);
+ }
+}
+
+#endif
+
+#if defined(__AVXVNNI__) || defined(__AVX512VNNI__)
+
+#define opus_mm256_dpbusds_epi32(src, a, b) _mm256_dpbusds_epi32(src, a, b)
+
+#elif defined(__AVX2__)
+
+static inline __m256i opus_mm256_dpbusds_epi32(__m256i src, __m256i a, __m256i b) {
+ __m256i ones, tmp;
+ ones = _mm256_set1_epi16(1);
+ tmp = _mm256_maddubs_epi16(a, b);
+ tmp = _mm256_madd_epi16(tmp, ones);
+ return _mm256_add_epi32(src, tmp);
+}
+
+#elif defined(__SSSE3__)
+
+static inline mm256i_emu opus_mm256_dpbusds_epi32(mm256i_emu src, mm256i_emu a, mm256i_emu b) {
+ mm256i_emu ones, tmp;
+ ones = _mm256_set1_epi16(1);
+ tmp = _mm256_maddubs_epi16(a, b);
+ tmp = _mm256_madd_epi16(tmp, ones);
+ return _mm256_add_epi32(src, tmp);
+}
+
+#elif defined(__SSE2__)
+
+static inline __m128i mm_dpbusds_epi32(__m128i src, __m128i a, __m128i b) {
+ __m128i ah, al, bh, bl, tmp;
+ ah = _mm_srli_epi16(a, 8);
+ bh = _mm_srai_epi16(b, 8);
+ al = _mm_srli_epi16(_mm_slli_epi16(a, 8), 8);
+ bl = _mm_srai_epi16(_mm_slli_epi16(b, 8), 8);
+ tmp = _mm_add_epi32(_mm_madd_epi16(ah, bh), _mm_madd_epi16(al, bl));
+ return _mm_add_epi32(src, tmp);
+}
+
+static inline mm256i_emu opus_mm256_dpbusds_epi32(mm256i_emu src, mm256i_emu a, mm256i_emu b) {
+ mm256i_emu res;
+ res.hi = mm_dpbusds_epi32(src.hi, a.hi, b.hi);
+ res.lo = mm_dpbusds_epi32(src.lo, a.lo, b.lo);
+ return res;
+}
+
+
+#else
+
+#error "No optimizations in vec_avx.h. This should never happen. "
+#endif
+
+static inline void sgemv(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
+{
+ int i, j;
+ i=0;
+ for (;i<rows-15;i+=16)
+ {
+ float *y;
+ __m256 vy0, vy8;
+ y = &out[i];
+ vy0 = _mm256_setzero_ps();
+ vy8 = _mm256_setzero_ps();
+ for (j=0;j<cols;j++)
+ {
+ __m256 vxj;
+ __m256 vw;
+ vxj = _mm256_broadcast_ss(&x[j]);
+
+ vw = _mm256_loadu_ps(&weights[j*col_stride + i]);
+ vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
+
+ vw = _mm256_loadu_ps(&weights[j*col_stride + i + 8]);
+ vy8 = _mm256_fmadd_ps(vw, vxj, vy8);
+ }
+ _mm256_storeu_ps (&y[0], vy0);
+ _mm256_storeu_ps (&y[8], vy8);
+ }
+ for (;i<rows-7;i+=8)
+ {
+ float *y;
+ __m256 vy0;
+ y = &out[i];
+ vy0 = _mm256_setzero_ps();
+ for (j=0;j<cols;j++)
+ {
+ __m256 vxj;
+ __m256 vw;
+ vxj = _mm256_broadcast_ss(&x[j]);
+
+ vw = _mm256_loadu_ps(&weights[j*col_stride + i]);
+ vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
+ }
+ _mm256_storeu_ps (&y[0], vy0);
+ }
+ for (;i<rows-3;i+=4)
+ {
+ float *y;
+ __m128 vy0;
+ y = &out[i];
+ vy0 = _mm_setzero_ps();
+ for (j=0;j<cols;j++)
+ {
+ __m128 vxj;
+ __m128 vw;
+ vxj = _mm_set1_ps(x[j]);
+
+ vw = _mm_loadu_ps(&weights[j*col_stride + i]);
+ vy0 = _mm_fmadd_ps(vw, vxj, vy0);
+ }
+ _mm_storeu_ps (&y[0], vy0);
+ }
+ for (;i<rows;i++)
+ {
+ out[i] = 0;
+ for (j=0;j<cols;j++) out[i] += weights[j*col_stride + i]*x[j];
+ }
+}
+
+static inline void sparse_sgemv8x4(float *out, const float *weights, const int *idx, int rows, const float *x)
+{
+ int i, j;
+ for (i=0;i<rows;i+=8)
+ {
+ float *y;
+ int cols;
+ __m256 vy0;
+ y = &out[i];
+ vy0 = _mm256_setzero_ps();
+ cols = *idx++;
+ for (j=0;j<cols;j++)
+ {
+ int id;
+ __m256 vxj;
+ __m256 vw;
+ id = *idx++;
+ vxj = _mm256_broadcast_ss(&x[id]);
+ vw = _mm256_loadu_ps(&weights[0]);
+ vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
+
+ vxj = _mm256_broadcast_ss(&x[id+1]);
+ vw = _mm256_loadu_ps(&weights[8]);
+ vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
+
+ vxj = _mm256_broadcast_ss(&x[id+2]);
+ vw = _mm256_loadu_ps(&weights[16]);
+ vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
+
+ vxj = _mm256_broadcast_ss(&x[id+3]);
+ vw = _mm256_loadu_ps(&weights[24]);
+ vy0 = _mm256_fmadd_ps(vw, vxj, vy0);
+
+ weights += 32;
+ }
+ _mm256_storeu_ps (&y[0], vy0);
+ }
+}
+
+static inline void sparse_cgemv8x4(float *_out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
+{
+ int i, j;
+ unsigned char x[MAX_INPUTS];
+ /*for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);*/
+ vector_ps_to_epi8(x, _x, cols);
+ for (i=0;i<rows;i+=8)
+ {
+ int colblocks;
+ __m256i vy0;
+ __m256 vout;
+ colblocks = *idx++;
+ vy0 = _mm256_setzero_si256();
+ j=0;
+#if 1 /* Unrolling by 4 gives some gain, comment out if it does not. */
+ for (;j<colblocks-3;j+=4)
+ {
+ __m256i vxj;
+ __m256i vw;
+ vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
+ vw = _mm256_loadu_si256((const __m256i *)(void*)w);
+ vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
+ w += 32;
+ vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
+ vw = _mm256_loadu_si256((const __m256i *)(void*)w);
+ vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
+ w += 32;
+ vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
+ vw = _mm256_loadu_si256((const __m256i *)(void*)w);
+ vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
+ w += 32;
+ vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
+ vw = _mm256_loadu_si256((const __m256i *)(void*)w);
+ vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
+ w += 32;
+ }
+#endif
+ for (;j<colblocks;j++)
+ {
+ __m256i vxj;
+ __m256i vw;
+ vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[*idx++]));
+ vw = _mm256_loadu_si256((const __m256i *)(void*)w);
+ vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
+ w += 32;
+ }
+ vout = _mm256_cvtepi32_ps(vy0);
+ vout = _mm256_mul_ps(vout, _mm256_loadu_ps(&scale[i]));
+ _mm256_storeu_ps(&_out[i], vout);
+ }
+}
+static inline void cgemv8x4(float *_out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
+{
+ int i, j;
+ unsigned char x[MAX_INPUTS];
+ /*for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);*/
+ vector_ps_to_epi8(x, _x, cols);
+ for (i=0;i<rows;i+=8)
+ {
+ __m256i vy0;
+ __m256 vout;
+ vy0 = _mm256_setzero_si256();
+ j=0;
+#if 1 /* Unrolling by 4 gives some gain, comment out if it does not. */
+ for (;j<cols-12;j+=16)
+ {
+ __m256i vxj;
+ __m256i vw;
+ vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j]));
+ vw = _mm256_loadu_si256((const __m256i *)(void*)w);
+ vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
+ w += 32;
+ vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j+4]));
+ vw = _mm256_loadu_si256((const __m256i *)(void*)w);
+ vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
+ w += 32;
+ vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j+8]));
+ vw = _mm256_loadu_si256((const __m256i *)(void*)w);
+ vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
+ w += 32;
+ vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j+12]));
+ vw = _mm256_loadu_si256((const __m256i *)(void*)w);
+ vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
+ w += 32;
+ }
+#endif
+ for (;j<cols;j+=4)
+ {
+ __m256i vxj;
+ __m256i vw;
+ vxj = _mm256_broadcastd_epi32(_mm_loadu_si32(&x[j]));
+ vw = _mm256_loadu_si256((const __m256i *)(void*)w);
+ vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
+ w += 32;
+ }
+ vout = _mm256_cvtepi32_ps(vy0);
+ vout = _mm256_mul_ps(vout, _mm256_loadu_ps(&scale[i]));
+ _mm256_storeu_ps(&_out[i], vout);
+ }
+}
+
+#define SCALE (128.f*127.f)
+#define SCALE_1 (1.f/128.f/127.f)
+#define USE_SU_BIAS
+
+
+#endif /*VEC_AVX_H*/
diff --git a/dnn/vec_neon.h b/dnn/vec_neon.h
new file mode 100644
index 00000000..acf49f47
--- /dev/null
+++ b/dnn/vec_neon.h
@@ -0,0 +1,473 @@
+/* Copyright (c) 2018 David Rowe
+ 2018 Mozilla
+ 2008-2011 Octasic Inc.
+ 2012-2017 Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/* NEON support for ARM machines */
+
+#ifndef VEC_NEON_H
+#define VEC_NEON_H
+
+#include <arm_neon.h>
+#include "os_support.h"
+
+#if defined(__arm__) && !defined(__aarch64__)
+/* Emulate vcvtnq_s32_f32() for ARMv7 Neon. */
+static OPUS_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t x) {
+ return vrshrq_n_s32(vcvtq_n_s32_f32(x, 8), 8);
+}
+
+static OPUS_INLINE int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
+ return vcombine_s16(vpadd_s16(vget_low_s16(a), vget_high_s16(a)), vpadd_s16(vget_low_s16(b), vget_high_s16(b)));
+}
+
+static OPUS_INLINE int16x8_t vmull_high_s8(int8x16_t a, int8x16_t b) {
+ return vmull_s8(vget_high_s8(a), vget_high_s8(b));
+}
+#endif
+
+#ifdef __ARM_FEATURE_FMA
+/* If we can, force the compiler to use an FMA instruction rather than break
+ vmlaq_f32() into fmul/fadd. */
+#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
+#endif
+
+#ifndef LPCNET_TEST
+static inline float32x4_t exp4_approx(float32x4_t x) {
+ int32x4_t i;
+ float32x4_t xf;
+
+ x = vmaxq_f32(vminq_f32(x, vdupq_n_f32(88.f)), vdupq_n_f32(-88.f));
+
+ /* express exp(x) as exp2(x/log(2)), add 127 for the exponent later */
+ x = vmlaq_f32(vdupq_n_f32(127.f), x, vdupq_n_f32(1.44269504f));
+
+ /* split into integer and fractional parts */
+ i = vcvtq_s32_f32(x);
+ xf = vcvtq_f32_s32(i);
+ x = vsubq_f32(x, xf);
+
+ float32x4_t K0 = vdupq_n_f32(0.99992522f);
+ float32x4_t K1 = vdupq_n_f32(0.69583354f);
+ float32x4_t K2 = vdupq_n_f32(0.22606716f);
+ float32x4_t K3 = vdupq_n_f32(0.078024523f);
+ float32x4_t Y = vmlaq_f32(K0, x, vmlaq_f32(K1, x, vmlaq_f32(K2, K3, x)));
+
+ /* compute 2^i */
+ float32x4_t exponent = vreinterpretq_f32_s32(vshlq_n_s32(i, 23));
+
+ Y = vmulq_f32(Y, exponent);
+ return Y;
+}
+
+static inline float32x4_t tanh4_approx(float32x4_t X)
+{
+ const float32x4_t N0 = vdupq_n_f32(952.52801514f);
+ const float32x4_t N1 = vdupq_n_f32(96.39235687f);
+ const float32x4_t N2 = vdupq_n_f32(0.60863042f);
+ const float32x4_t D0 = vdupq_n_f32(952.72399902f);
+ const float32x4_t D1 = vdupq_n_f32(413.36801147f);
+ const float32x4_t D2 = vdupq_n_f32(11.88600922f);
+ const float32x4_t max_out = vdupq_n_f32(1.f);
+ const float32x4_t min_out = vdupq_n_f32(-1.f);
+ float32x4_t X2, num, den;
+ X2 = vmulq_f32(X, X);
+ num = vmlaq_f32(N0, X2, vmlaq_f32(N1, N2, X2));
+ den = vmlaq_f32(D0, X2, vmlaq_f32(D1, D2, X2));
+ num = vmulq_f32(num, X);
+ den = vrecpeq_f32(den);
+ num = vmulq_f32(num, den);
+ return vmaxq_f32(min_out, vminq_f32(max_out, num));
+}
+
+static inline float32x4_t sigmoid4_approx(float32x4_t X)
+{
+ const float32x4_t N0 = vdupq_n_f32(238.13200378f);
+ const float32x4_t N1 = vdupq_n_f32(6.02452230f);
+ const float32x4_t N2 = vdupq_n_f32(0.00950985f);
+ const float32x4_t D0 = vdupq_n_f32(952.72399902f);
+ const float32x4_t D1 = vdupq_n_f32(103.34200287f);
+ const float32x4_t D2 = vdupq_n_f32(0.74287558f);
+ const float32x4_t half = vdupq_n_f32(0.5f);
+ const float32x4_t max_out = vdupq_n_f32(1.f);
+ const float32x4_t min_out = vdupq_n_f32(0.f);
+ float32x4_t X2, num, den;
+ X2 = vmulq_f32(X, X);
+ num = vmlaq_f32(N0, X2, vmlaq_f32(N1, N2, X2));
+ den = vmlaq_f32(D0, X2, vmlaq_f32(D1, D2, X2));
+ num = vmulq_f32(num, X);
+ den = vrecpeq_f32(den);
+ num = vmlaq_f32(half, num, den);
+ return vmaxq_f32(min_out, vminq_f32(max_out, num));
+}
+
+static inline float lpcnet_exp(float x)
+{
+ float out[4];
+ float32x4_t X, Y;
+ X = vdupq_n_f32(x);
+ Y = exp4_approx(X);
+ vst1q_f32(out, Y);
+ return out[0];
+}
+
+static inline float tanh_approx(float x)
+{
+ float out[4];
+ float32x4_t X, Y;
+ X = vdupq_n_f32(x);
+ Y = tanh4_approx(X);
+ vst1q_f32(out, Y);
+ return out[0];
+}
+
+static inline float sigmoid_approx(float x)
+{
+ float out[4];
+ float32x4_t X, Y;
+ X = vdupq_n_f32(x);
+ Y = sigmoid4_approx(X);
+ vst1q_f32(out, Y);
+ return out[0];
+}
+
+static inline void softmax(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N-3;i+=4)
+ {
+ float32x4_t X, Y;
+ X = vld1q_f32(&x[i]);
+ Y = exp4_approx(X);
+ vst1q_f32(&y[i], Y);
+ }
+ for (;i<N;i++)
+ y[i] = lpcnet_exp(x[i]);
+}
+
+static inline void vec_tanh(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N-3;i+=4)
+ {
+ float32x4_t X, Y;
+ X = vld1q_f32(&x[i]);
+ Y = tanh4_approx(X);
+ vst1q_f32(&y[i], Y);
+ }
+ for (;i<N;i++)
+ {
+ float ex2;
+ ex2 = lpcnet_exp(2*x[i]);
+ y[i] = (ex2-1)/(ex2+1);
+ }
+}
+
+static inline void vec_sigmoid(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N-3;i+=4)
+ {
+ float32x4_t X, Y;
+ X = vld1q_f32(&x[i]);
+ Y = sigmoid4_approx(X);
+ vst1q_f32(&y[i], Y);
+ }
+ for (;i<N;i++)
+ {
+ float ex;
+ ex = lpcnet_exp(x[i]);
+ y[i] = (ex)/(ex+1);
+ }
+}
+#endif
+
+static inline void sgemv16x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
+{
+ int i, j;
+ for (i=0;i<rows;i+=16)
+ {
+ float * restrict y = &out[i];
+
+ /* keep y[0..15] in registers for duration of inner loop */
+
+ float32x4_t y0_3 = vdupq_n_f32(0);
+ float32x4_t y4_7 = vdupq_n_f32(0);
+ float32x4_t y8_11 = vdupq_n_f32(0);
+ float32x4_t y12_15 = vdupq_n_f32(0);
+
+ for (j=0;j<cols;j++)
+ {
+ const float * restrict w;
+ float32x4_t wvec0_3, wvec4_7, wvec8_11, wvec12_15;
+ float32x4_t xj;
+
+ w = &weights[j*col_stride + i];
+ wvec0_3 = vld1q_f32(&w[0]);
+ wvec4_7 = vld1q_f32(&w[4]);
+ wvec8_11 = vld1q_f32(&w[8]);
+ wvec12_15 = vld1q_f32(&w[12]);
+
+ xj = vld1q_dup_f32(&x[j]);
+
+ y0_3 = vmlaq_f32(y0_3, wvec0_3, xj);
+ y4_7 = vmlaq_f32(y4_7, wvec4_7, xj);
+ y8_11 = vmlaq_f32(y8_11, wvec8_11, xj);
+ y12_15 = vmlaq_f32(y12_15, wvec12_15, xj);
+ }
+
+ /* save y[0..15] back to memory */
+
+ vst1q_f32(&y[0], y0_3);
+ vst1q_f32(&y[4], y4_7);
+ vst1q_f32(&y[8], y8_11);
+ vst1q_f32(&y[12], y12_15);
+
+ }
+}
+
+static inline void sgemv8x1(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
+{
+ int i, j;
+ for (i=0;i<rows;i+=8)
+ {
+ float * restrict y = &out[i];
+
+ /* keep y[0..15] in registers for duration of inner loop */
+
+ float32x4_t y0_3 = vdupq_n_f32(0);
+ float32x4_t y4_7 = vdupq_n_f32(0);
+
+ for (j=0;j<cols;j++)
+ {
+ const float * restrict w;
+ float32x4_t wvec0_3, wvec4_7;
+ float32x4_t xj;
+
+ w = &weights[j*col_stride + i];
+ wvec0_3 = vld1q_f32(&w[0]);
+ wvec4_7 = vld1q_f32(&w[4]);
+
+ xj = vld1q_dup_f32(&x[j]);
+
+ y0_3 = vmlaq_f32(y0_3, wvec0_3, xj);
+ y4_7 = vmlaq_f32(y4_7, wvec4_7, xj);
+ }
+
+ /* save y[0..15] back to memory */
+
+ vst1q_f32(&y[0], y0_3);
+ vst1q_f32(&y[4], y4_7);
+ }
+}
+
+static inline void sgemv(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
+{
+ if ((rows&0xf) == 0) sgemv16x1(out, weights, rows, cols, col_stride, x);
+ else if ((rows&0x7) == 0) sgemv8x1(out, weights, rows, cols, col_stride, x);
+ else {
+ int i, j;
+ for (i=0;i<rows;i++)
+ {
+ out[i] = 0;
+ for (j=0;j<cols;j++) out[i] += weights[j*col_stride + i]*x[j];
+ }
+ }
+}
+
+/* Temporarily use unoptimized version */
+static inline void sparse_sgemv8x4(float *out, const float *w, const int *idx, int rows, const float *x)
+{
+ int i, j;
+ OPUS_CLEAR(out, rows);
+ for (i=0;i<rows;i+=8)
+ {
+ int cols;
+ cols = *idx++;
+ for (j=0;j<cols;j++)
+ {
+ int pos;
+ float * restrict y;
+ float xj0, xj1, xj2, xj3;
+ pos = (*idx++);
+ xj0 = x[pos+0];
+ xj1 = x[pos+1];
+ xj2 = x[pos+2];
+ xj3 = x[pos+3];
+ y = &out[i];
+ y[0] += w[0]*xj0;
+ y[1] += w[1]*xj0;
+ y[2] += w[2]*xj0;
+ y[3] += w[3]*xj0;
+ y[4] += w[4]*xj0;
+ y[5] += w[5]*xj0;
+ y[6] += w[6]*xj0;
+ y[7] += w[7]*xj0;
+
+ y[0] += w[8]*xj1;
+ y[1] += w[9]*xj1;
+ y[2] += w[10]*xj1;
+ y[3] += w[11]*xj1;
+ y[4] += w[12]*xj1;
+ y[5] += w[13]*xj1;
+ y[6] += w[14]*xj1;
+ y[7] += w[15]*xj1;
+
+ y[0] += w[16]*xj2;
+ y[1] += w[17]*xj2;
+ y[2] += w[18]*xj2;
+ y[3] += w[19]*xj2;
+ y[4] += w[20]*xj2;
+ y[5] += w[21]*xj2;
+ y[6] += w[22]*xj2;
+ y[7] += w[23]*xj2;
+
+ y[0] += w[24]*xj3;
+ y[1] += w[25]*xj3;
+ y[2] += w[26]*xj3;
+ y[3] += w[27]*xj3;
+ y[4] += w[28]*xj3;
+ y[5] += w[29]*xj3;
+ y[6] += w[30]*xj3;
+ y[7] += w[31]*xj3;
+ w += 32;
+ }
+ }
+}
+
+
+#define SCALE (128.f*127.f)
+#define SCALE_1 (1.f/128.f/127.f)
+
+#define MAX_INPUTS 2048
+#define MAX_OUTPUTS 8192
+
+#if __ARM_FEATURE_DOTPROD
+static inline int32x4_t vdotprod(int32x4_t acc, int8x16_t a, int8x16_t b) {
+ return vdotq_s32(acc, a, b);
+}
+#else
+static inline int32x4_t vdotprod(int32x4_t acc, int8x16_t a, int8x16_t b)
+{
+ return vpadalq_s16(acc, vpaddq_s16(vmull_s8(vget_low_s8(a), vget_low_s8(b)), vmull_high_s8(a, b)));
+}
+#endif
+
+static inline void cgemv8x4(float *_out, const opus_int8 *w, const float *scale, int rows, int cols, const float *_x)
+{
+ int i, j;
+ opus_int32 x_int[MAX_INPUTS/4];
+ opus_int8 *x = (opus_int8*) x_int;
+ const float32x4_t const127 = vdupq_n_f32(127.);
+ for (i=0;i<cols;i+=8) {
+ int32x4_t xi0, xi4;
+ int16x8_t x_short;
+ xi0 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i])));
+ xi4 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i+4])));
+ x_short = vcombine_s16(vmovn_s32(xi0), vmovn_s32(xi4));
+ vst1_s8(&x[i], vmovn_s16(x_short));
+ }
+ for (i=0;i<rows;i+=8)
+ {
+ int32x4_t acc0, acc1;
+ int32x4_t acc2, acc3;
+ acc0 = vdupq_n_s32(0);
+ acc1 = vdupq_n_s32(0);
+ acc2 = vdupq_n_s32(0);
+ acc3 = vdupq_n_s32(0);
+ j=0;
+ for (;j<cols-4;j+=8)
+ {
+ int8x16_t vw0, vw1, vw2, vw3, vx0, vx1;
+ vx0 = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[j]);
+ vw0 = vld1q_s8(w);
+ vw1 = vld1q_s8(&w[16]);
+ acc0 = vdotprod(acc0, vw0, vx0);
+ acc1 = vdotprod(acc1, vw1, vx0);
+ vx1 = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[j+4]);
+ vw2 = vld1q_s8(&w[32]);
+ vw3 = vld1q_s8(&w[48]);
+ acc2 = vdotprod(acc2, vw2, vx1);
+ acc3 = vdotprod(acc3, vw3, vx1);
+ w += 64;
+ }
+ acc0 = vaddq_s32(acc0, acc2);
+ acc1 = vaddq_s32(acc1, acc3);
+ for (;j<cols;j+=4)
+ {
+ int8x16_t vw0, vw1, vx;
+ vx = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[j]);
+ vw0 = vld1q_s8(w);
+ vw1 = vld1q_s8(&w[16]);
+ acc0 = vdotprod(acc0, vw0, vx);
+ acc1 = vdotprod(acc1, vw1, vx);
+ w += 32;
+ }
+ vst1q_f32(&_out[i], vmulq_f32(vld1q_f32(&scale[i]), vcvtq_f32_s32(acc0)));
+ vst1q_f32(&_out[i+4], vmulq_f32(vld1q_f32(&scale[i+4]), vcvtq_f32_s32(acc1)));
+ }
+}
+
+static inline void sparse_cgemv8x4(float *_out, const opus_int8 *w, const int *idx, const float *scale, int rows, int cols, const float *_x)
+{
+ int i, j;
+ opus_int32 x_int[MAX_INPUTS/4];
+ opus_int8 *x = (opus_int8*) x_int;
+ const float32x4_t const127 = vdupq_n_f32(127.);
+ for (i=0;i<cols;i+=8) {
+ int32x4_t xi0, xi4;
+ int16x8_t x_short;
+ xi0 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i])));
+ xi4 = vcvtnq_s32_f32(vmulq_f32(const127, vld1q_f32(&_x[i+4])));
+ x_short = vcombine_s16(vmovn_s32(xi0), vmovn_s32(xi4));
+ vst1_s8(&x[i], vmovn_s16(x_short));
+ }
+ for (i=0;i<rows;i+=8)
+ {
+ int colblocks;
+ int32x4_t acc0, acc1;
+ acc0 = vdupq_n_s32(0);
+ acc1 = vdupq_n_s32(0);
+ colblocks = *idx++;
+ for (j=0;j<colblocks;j++)
+ {
+ int pos;
+ pos = (*idx++);
+ int8x16_t vw0, vw1, vx;
+ vx = (int8x16_t)vld1q_dup_s32((int*)(void*)&x[pos]);
+ vw0 = vld1q_s8(w);
+ vw1 = vld1q_s8(&w[16]);
+ acc0 = vdotprod(acc0, vw0, vx);
+ acc1 = vdotprod(acc1, vw1, vx);
+ w += 32;
+ }
+ vst1q_f32(&_out[i], vmulq_f32(vld1q_f32(&scale[i]), vcvtq_f32_s32(acc0)));
+ vst1q_f32(&_out[i+4], vmulq_f32(vld1q_f32(&scale[i+4]), vcvtq_f32_s32(acc1)));
+ }
+}
+
+
+#endif
diff --git a/dnn/write_lpcnet_weights.c b/dnn/write_lpcnet_weights.c
new file mode 100644
index 00000000..2f80b962
--- /dev/null
+++ b/dnn/write_lpcnet_weights.c
@@ -0,0 +1,97 @@
+/* Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stddef.h>
+#include "nnet.h"
+#include "os_support.h"
+#include "arch.h"
+
+/* This is a bit of a hack because we need to build nnet_data.c and plc_data.c without USE_WEIGHTS_FILE,
+ but USE_WEIGHTS_FILE is defined in config.h. */
+#undef HAVE_CONFIG_H
+#ifdef USE_WEIGHTS_FILE
+#undef USE_WEIGHTS_FILE
+#endif
+#include "pitchdnn_data.c"
+#include "fargan_data.c"
+#include "plc_data.c"
+#include "dred_rdovae_enc_data.c"
+#include "dred_rdovae_dec_data.c"
+#ifdef ENABLE_OSCE
+#include "lace_data.c"
+#include "nolace_data.c"
+#endif
+
+void write_weights(const WeightArray *list, FILE *fout)
+{
+ int i=0;
+ unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0};
+ while (list[i].name != NULL) {
+ WeightHead h;
+ if (strlen(list[i].name) >= sizeof(h.name) - 1) {
+ printf("[write_weights] warning: name %s too long\n", list[i].name);
+ }
+ memcpy(h.head, "DNNw", 4);
+ h.version = WEIGHT_BLOB_VERSION;
+ h.type = list[i].type;
+ h.size = list[i].size;
+ h.block_size = (h.size+WEIGHT_BLOCK_SIZE-1)/WEIGHT_BLOCK_SIZE*WEIGHT_BLOCK_SIZE;
+ OPUS_CLEAR(h.name, sizeof(h.name));
+ strncpy(h.name, list[i].name, sizeof(h.name));
+ h.name[sizeof(h.name)-1] = 0;
+ celt_assert(sizeof(h) == WEIGHT_BLOCK_SIZE);
+ fwrite(&h, 1, WEIGHT_BLOCK_SIZE, fout);
+ fwrite(list[i].data, 1, h.size, fout);
+ fwrite(zeros, 1, h.block_size-h.size, fout);
+ i++;
+ }
+}
+
+int main(void)
+{
+ FILE *fout = fopen("weights_blob.bin", "w");
+ write_weights(pitchdnn_arrays, fout);
+ write_weights(fargan_arrays, fout);
+ write_weights(plcmodel_arrays, fout);
+ write_weights(rdovaeenc_arrays, fout);
+ write_weights(rdovaedec_arrays, fout);
+#ifdef ENABLE_OSCE
+#ifndef DISABLE_LACE
+ write_weights(lacelayers_arrays, fout);
+#endif
+#ifndef DISABLE_NOLACE
+ write_weights(nolacelayers_arrays, fout);
+#endif
+#endif
+ fclose(fout);
+ return 0;
+}
diff --git a/dnn/x86/dnn_x86.h b/dnn/x86/dnn_x86.h
new file mode 100644
index 00000000..f2183327
--- /dev/null
+++ b/dnn/x86/dnn_x86.h
@@ -0,0 +1,121 @@
+/* Copyright (c) 2011-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DNN_X86_H
+#define DNN_X86_H
+
+#include "cpu_support.h"
+#include "opus_types.h"
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2)
+void compute_linear_sse2(const LinearLayer *linear, float *out, const float *in);
+void compute_activation_sse2(float *output, const float *input, int N, int activation);
+void compute_conv2d_sse2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+void compute_linear_sse4_1(const LinearLayer *linear, float *out, const float *in);
+void compute_activation_sse4_1(float *output, const float *input, int N, int activation);
+void compute_conv2d_sse4_1(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_AVX2)
+void compute_linear_avx2(const LinearLayer *linear, float *out, const float *in);
+void compute_activation_avx2(float *output, const float *input, int N, int activation);
+void compute_conv2d_avx2(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
+#endif
+
+
+#if defined(OPUS_X86_PRESUME_AVX2)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_avx2(linear, out, in))
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_avx2(output, input, N, activation))
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_avx2(conv, out, mem, in, height, hstride, activation))
+
+#elif defined(OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse4_1(linear, out, in))
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse4_1(output, input, N, activation))
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse4_1(conv, out, mem, in, height, hstride, activation))
+
+#elif defined(OPUS_X86_PRESUME_SSE2) && !defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse2(linear, out, in))
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_sse2(output, input, N, activation))
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_sse2(conv, out, mem, in, height, hstride, activation))
+
+#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_X86_MAY_HAVE_AVX2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2))
+
+extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
+ const LinearLayer *linear,
+ float *out,
+ const float *in
+ );
+#define OVERRIDE_COMPUTE_LINEAR
+#define compute_linear(linear, out, in, arch) \
+ ((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
+
+
+extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
+ float *output,
+ const float *input,
+ int N,
+ int activation
+ );
+#define OVERRIDE_COMPUTE_ACTIVATION
+#define compute_activation(output, input, N, activation, arch) \
+ ((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
+
+
+extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
+ const Conv2dLayer *conv,
+ float *out,
+ float *mem,
+ const float *in,
+ int height,
+ int hstride,
+ int activation
+ );
+#define OVERRIDE_COMPUTE_CONV2D
+#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
+ ((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
+
+
+#endif
+
+
+
+#endif /* DNN_X86_H */
diff --git a/dnn/x86/nnet_avx2.c b/dnn/x86/nnet_avx2.c
new file mode 100644
index 00000000..41037fcc
--- /dev/null
+++ b/dnn/x86/nnet_avx2.c
@@ -0,0 +1,40 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "x86/x86_arch_macros.h"
+
+#ifndef __AVX2__
+#error nnet_avx2.c is being compiled without AVX2 enabled
+#endif
+
+#define RTCD_ARCH avx2
+
+#include "nnet_arch.h"
diff --git a/dnn/x86/nnet_sse2.c b/dnn/x86/nnet_sse2.c
new file mode 100644
index 00000000..447b947c
--- /dev/null
+++ b/dnn/x86/nnet_sse2.c
@@ -0,0 +1,40 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "x86/x86_arch_macros.h"
+
+#ifndef __SSE2__
+#error nnet_sse2.c is being compiled without SSE2 enabled
+#endif
+
+#define RTCD_ARCH sse2
+
+#include "nnet_arch.h"
diff --git a/dnn/x86/nnet_sse4_1.c b/dnn/x86/nnet_sse4_1.c
new file mode 100644
index 00000000..224926e5
--- /dev/null
+++ b/dnn/x86/nnet_sse4_1.c
@@ -0,0 +1,40 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "x86/x86_arch_macros.h"
+
+#ifndef __SSE4_1__
+#error nnet_sse4_1.c is being compiled without SSE4.1 enabled
+#endif
+
+#define RTCD_ARCH sse4_1
+
+#include "nnet_arch.h"
diff --git a/dnn/x86/x86_dnn_map.c b/dnn/x86/x86_dnn_map.c
new file mode 100644
index 00000000..d673e134
--- /dev/null
+++ b/dnn/x86/x86_dnn_map.c
@@ -0,0 +1,83 @@
+/* Copyright (c) 2018-2019 Mozilla
+ 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "x86/x86cpu.h"
+#include "nnet.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+#if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_AVX2))
+
+void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
+ const LinearLayer *linear,
+ float *out,
+ const float *in
+) = {
+ compute_linear_c, /* non-sse */
+ compute_linear_c,
+ MAY_HAVE_SSE2(compute_linear),
+ MAY_HAVE_SSE4_1(compute_linear), /* sse4.1 */
+ MAY_HAVE_AVX2(compute_linear) /* avx */
+};
+
+void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
+ float *output,
+ const float *input,
+ int N,
+ int activation
+) = {
+ compute_activation_c, /* non-sse */
+ compute_activation_c,
+ MAY_HAVE_SSE2(compute_activation),
+ MAY_HAVE_SSE4_1(compute_activation), /* sse4.1 */
+ MAY_HAVE_AVX2(compute_activation) /* avx */
+};
+
+void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
+ const Conv2dLayer *conv,
+ float *out,
+ float *mem,
+ const float *in,
+ int height,
+ int hstride,
+ int activation
+) = {
+ compute_conv2d_c, /* non-sse */
+ compute_conv2d_c,
+ MAY_HAVE_SSE2(compute_conv2d),
+ MAY_HAVE_SSE4_1(compute_conv2d), /* sse4.1 */
+ MAY_HAVE_AVX2(compute_conv2d) /* avx */
+};
+
+#endif
+
+
+#endif