Java jni调用nnom rnn-denoise 降噪

介绍:https://github.com/majianjia/nnom/blob/master/examples/rnn-denoise/README_CN.md

默认提供了一个wav的例子


#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>

#include "nnom.h"
#include "denoise_weights.h"

#include "mfcc.h"
#include "wav.h"

 // the bandpass filter coefficiences
#include "equalizer_coeff.h" 

#define NUM_FEATURES NUM_FILTER

#define _MAX(x, y) (((x) > (y)) ? (x) : (y))
#define _MIN(x, y) (((x) < (y)) ? (x) : (y))

#define NUM_CHANNELS 	1
#define SAMPLE_RATE 	16000
#define AUDIO_FRAME_LEN 512

// audio buffer for input
float audio_buffer[AUDIO_FRAME_LEN] = {0};
int16_t audio_buffer_16bit[AUDIO_FRAME_LEN] = {0};

// buffer for output
int16_t audio_buffer_filtered[AUDIO_FRAME_LEN/2] = { 0 };


// mfcc features and their derivatives
float mfcc_feature[NUM_FEATURES] = { 0 };
float mfcc_feature_prev[NUM_FEATURES] = { 0 };
float mfcc_feature_diff[NUM_FEATURES] = { 0 };
float mfcc_feature_diff_prev[NUM_FEATURES] = { 0 };
float mfcc_feature_diff1[NUM_FEATURES] = { 0 };
// features for NN
float nn_features[64] = {0};
int8_t nn_features_q7[64] = {0};

// NN results, which is the gains for each frequency band
float band_gains[NUM_FILTER] = {0};
float band_gains_prev[NUM_FILTER] = {0};

// 0db gains coefficient
float coeff_b[NUM_FILTER][NUM_COEFF_PAIR] = FILTER_COEFF_B;
float coeff_a[NUM_FILTER][NUM_COEFF_PAIR] = FILTER_COEFF_A;
// dynamic gains coefficient
float b_[NUM_FILTER][NUM_COEFF_PAIR] = {0};

// update the history
void y_h_update(float *y_h, uint32_t len)
{
	for (uint32_t i = len-1; i >0 ;i--)
		y_h[i] = y_h[i-1];
}

//  equalizer by multiple n order iir band pass filter. 
// y[i] = b[0] * x[i] + b[1] * x[i - 1] + b[2] * x[i - 2] - a[1] * y[i - 1] - a[2] * y[i - 2]...
void equalizer(float* x, float* y, uint32_t signal_len, float *b, float *a, uint32_t num_band, uint32_t num_order)
{
	// the y history for each band
	static float y_h[NUM_FILTER][NUM_COEFF_PAIR] = { 0 };
	static float x_h[NUM_COEFF_PAIR * 2] = { 0 };
	uint32_t num_coeff = num_order * 2 + 1;

	// i <= num_coeff (where historical x is involved in the first few points)
	// combine state and new data to get a continual x input. 
	memcpy(x_h + num_coeff, x, num_coeff * sizeof(float));
	for (uint32_t i = 0; i < num_coeff; i++)
	{
		y[i] = 0;
		for (uint32_t n = 0; n < num_band; n++)
		{
			y_h_update(y_h[n], num_coeff);
			y_h[n][0] = b[n * num_coeff] * x_h[i+ num_coeff];
			for (uint32_t c = 1; c < num_coeff; c++)
				y_h[n][0] += b[n * num_coeff + c] * x_h[num_coeff + i - c] - a[n * num_coeff + c] * y_h[n][c];
			y[i] += y_h[n][0];
		}
	}
	// store the x for the state of next round
	memcpy(x_h, &x[signal_len - num_coeff], num_coeff * sizeof(float));
	
	// i > num_coeff; the rest data not involed the x history
	for (uint32_t i = num_coeff; i < signal_len; i++)
	{
		y[i] = 0;
		for (uint32_t n = 0; n < num_band; n++)
		{
			y_h_update(y_h[n], num_coeff);
			y_h[n][0] = b[n * num_coeff] * x[i];
			for (uint32_t c = 1; c < num_coeff; c++)
				y_h[n][0] += b[n * num_coeff + c] * x[i - c] - a[n * num_coeff + c] * y_h[n][c];
			y[i] += y_h[n][0];
		}	
	}
}

// set dynamic gains. Multiple gains x b_coeff
void set_gains(float *b_in, float *b_out,  float* gains, uint32_t num_band, uint32_t num_order)
{
	uint32_t num_coeff = num_order * 2 + 1;
	for (uint32_t i = 0; i < num_band; i++)
		for (uint32_t c = 0; c < num_coeff; c++)
			b_out[num_coeff *i + c] = b_in[num_coeff * i + c] * gains[i]; // only need to set b. 
}

void quantize_data(float*din, int8_t *dout, uint32_t size, uint32_t int_bit)
{
	float limit = (1 << int_bit); 
	for(uint32_t i=0; i<size; i++)
		dout[i] = (int8_t)(_MAX(_MIN(din[i], limit), -limit) / limit * 127);
}

void log_values(float* value, uint32_t size, FILE* f)
{
	char line[16];
	for (uint32_t i = 0; i < size; i++) {
		snprintf(line, 16, "%f,", value[i]);
		fwrite(line, strlen(line), 1, f);
	}
	fwrite("\n", 2, 1, f);
}

int main(int argc, char* argv[])
{
	wav_header_t wav_header; 
	size_t size;

	char* input_file = "sample.wav";
	char* output_file = "filtered_sample.wav";
	FILE* src_file;
	FILE* des_file;

	char* log_file = "log.csv";
	FILE* flog = fopen(log_file, "wb");

	// if user has specify input and output files. 
	if (argc > 1)
		input_file = argv[1];
	if (argc > 2)
		output_file = argv[2];

	src_file = fopen(input_file, "rb");
	des_file = fopen(output_file, "wb");
	if (src_file == NULL) 
	{
		printf("Cannot open wav files, default input:'%s'\n", input_file);
		printf("Or use command to specify input file: xxx.exe [input.wav] [output.wav]\n");
		return -1;
	}
	if (des_file == NULL)
	{
		fclose(src_file); 
		return -1; 
	}
		
	// read wav file header, copy it to the output file.  
	fread(&wav_header, sizeof(wav_header), 1, src_file);
	fwrite(&wav_header, sizeof(wav_header), 1, des_file);

	// lets jump to the "data" chunk of the WAV file.
	if (strncmp(wav_header.datachunk_id, "data", 4)){
		wav_chunk_t chunk = { .size= wav_header.datachunk_size};
		// find the 'data' chunk
		do {
			char* buf = malloc(chunk.size);
			fread(buf, chunk.size, 1, src_file);
			fwrite(buf, chunk.size, 1, des_file);
			free(buf);
			fread(&chunk, sizeof(wav_chunk_t), 1, src_file);
			fwrite(&chunk, sizeof(wav_chunk_t), 1, des_file);
		} while (strncmp(chunk.id, "data", 4));
	}
	
	// NNoM model
	nnom_model_t *model = model = nnom_model_create();
	
	// 26 features, 0 offset, 26 bands, 512fft, 0 preempha, attached_energy_to_band0
	mfcc_t * mfcc = mfcc_create(NUM_FEATURES, 0, NUM_FEATURES, 512, 0, true);

	printf("\nProcessing file: %s\n", input_file);
	while(1) {
		// move buffer (50%) overlapping, move later 50% to the first 50, then fill 
		memcpy(audio_buffer_16bit, &audio_buffer_16bit[AUDIO_FRAME_LEN/2], AUDIO_FRAME_LEN/2*sizeof(int16_t));

		// now read the new data
		size = fread(&audio_buffer_16bit[AUDIO_FRAME_LEN / 2], AUDIO_FRAME_LEN / 2 * sizeof(int16_t), 1, src_file);
		if(size == 0)
			break;
		
		// get mfcc
		mfcc_compute(mfcc, audio_buffer_16bit, mfcc_feature);
		
//log_values(mfcc_feature, NUM_FEATURES, flog);

		// get the first and second derivative of mfcc
		for(uint32_t i=0; i< NUM_FEATURES; i++)
		{
			mfcc_feature_diff[i] = mfcc_feature[i] - mfcc_feature_prev[i];
			mfcc_feature_diff1[i] = mfcc_feature_diff[i] - mfcc_feature_diff_prev[i];
		}
		memcpy(mfcc_feature_prev, mfcc_feature, NUM_FEATURES * sizeof(float));
		memcpy(mfcc_feature_diff_prev, mfcc_feature_diff, NUM_FEATURES * sizeof(float));
		
		// combine MFCC with derivatives 
		memcpy(nn_features, mfcc_feature, NUM_FEATURES*sizeof(float));
		memcpy(&nn_features[NUM_FEATURES], mfcc_feature_diff, 10*sizeof(float));
		memcpy(&nn_features[NUM_FEATURES+10], mfcc_feature_diff1, 10*sizeof(float));

//log_values(nn_features, NUM_FEATURES+20, flog);
		
		// quantise them using the same scale as training data (in keras), by 2^n. 
		quantize_data(nn_features, nn_features_q7, NUM_FEATURES+20, 3);
		
		// run the mode with the new input
		memcpy(nnom_input_data, nn_features_q7, sizeof(nnom_input_data));
		model_run(model);
		
		// read the result, convert it back to float (q0.7 to float)
		for(int i=0; i< NUM_FEATURES; i++)
			band_gains[i] = (float)(nnom_output_data[i]) / 127.f;

log_values(band_gains, NUM_FILTER, flog);
		
		// one more step, limit the change of gians, to smooth the speech, per RNNoise paper
		for(int i=0; i< NUM_FEATURES; i++)
			band_gains[i] = _MAX(band_gains_prev[i]*0.8f, band_gains[i]); 
		memcpy(band_gains_prev, band_gains, NUM_FEATURES *sizeof(float));
		
		// apply the dynamic gains to each frequency band. 
		set_gains((float*)coeff_b, (float*)b_, band_gains, NUM_FILTER, NUM_ORDER);

		// convert 16bit to float for equalizer
		for (int i = 0; i < AUDIO_FRAME_LEN/2; i++)
			audio_buffer[i] = audio_buffer_16bit[i + AUDIO_FRAME_LEN / 2] / 32768.f;
				
		// finally, we apply the equalizer to this audio frame to denoise
		equalizer(audio_buffer, &audio_buffer[AUDIO_FRAME_LEN / 2], AUDIO_FRAME_LEN/2, (float*)b_,(float*)coeff_a, NUM_FILTER, NUM_ORDER);

		// convert the filtered signal back to int16
		for (int i = 0; i < AUDIO_FRAME_LEN / 2; i++)
			audio_buffer_filtered[i] = audio_buffer[i + AUDIO_FRAME_LEN / 2] * 32768.f *0.6f; 
		
		// write the filtered frame to WAV file. 
		fwrite(audio_buffer_filtered, 256*sizeof(int16_t), 1, des_file);
	}

	// print some model info
	model_io_format(model);
	model_stat(model);
	model_delete(model);

	fclose(flog);
	fclose(src_file);
	fclose(des_file);

	printf("\nNoisy signal '%s' has been de-noised by NNoM.\nThe output is saved to '%s'.\n", input_file, output_file);
	return 0;
}

去掉wav的信息就能解析pcm了

创建cmake 文件 编译dll

cmake_minimum_required(VERSION 3.10)

project(RnnDenoise)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED True)
include_directories(nnom-master/port/)
include_directories(nnom-master/inc/) 
include_directories(nnom-master/examples/rnn-denoise/)
include_directories(D:/java/jdk1.8x64/include/)
include_directories(D:/java/jdk1.8x64/include/win32/)
set(EXPLICIT_SOURCES
   RnnDenoise.c
   nnom-master/examples/rnn-denoise/mfcc.c
)
file(GLOB_RECURSE SRC_FILES "nnom-master/src/*/*.c")
set(SOURCES ${EXPLICIT_SOURCES} ${SRC_FILES})


add_library(RnnDenoise SHARED ${SOURCES})


java 库封装示例

package com.lilin.demoasr.nnom;

public class RnnDenoise implements AutoCloseable{

    private long rnndenoise;

    public long getRnndenoise() {
        return rnndenoise;
    }
    public void setRnndenoise(long rnndenoise) {
        this.rnndenoise = rnndenoise;
    }

    private static final Object globalLock = new Object();

    /**
     *https://github.com/majianjia/nnom/blob/master/examples/rnn-denoise/
     *
     *
     *
     * @throws Exception
     */
    public RnnDenoise() throws Exception {
        synchronized (globalLock) {
            RnnLoad.load("RnnDenoise");
        }
        this.rnndenoise = createRnnDenoise0();
    }

    private static native long createRnnDenoise0();

    private native short[] denoise0(long rnndenoise,short[] var1);

    /**
     * 固定320 每次 可以修改c 改大
     * @param input
     * @return
     */
    public   short[] denoise(short[] input) {
            // synchronized (this) {
                return this.denoise0(this.rnndenoise ,input);
            // }
    }

    private native long destroyRnnDenoise0();

    public void close() {
        synchronized (this) {
            this.destroyRnnDenoise0();
            this.rnndenoise = 0L;
        }
    }

    public boolean isClosed() {
        synchronized (this) {
            return this.rnndenoise == 0L;
        }
    }
}

test:

 public static void main (String[] args) {

        String sList []= new String[]{"G:\\work\\ai\\ZipEnhancer\\r1.pcm","C:\\Users\\\\lilin\\Desktop\\16k.pcm"};
       // String sList []= new String[]{"C:\\Users\\\\lilin\\Desktop\\16k.pcm"};
        List< Thread> lts= new ArrayList<>();

        for (int i = 0; i < sList.length; i++) {
            String file =sList[i];
            int finalI = i;
            lts.add(new Thread(new Runnable() {
                @Override
                public void run() {
                    try {
                        RnnDenoise rnnDenoise = new RnnDenoise();
                        System.out.println(rnnDenoise.getRnndenoise());
                        FileInputStream f = new FileInputStream(file);
                        FileOutputStream f1 = new FileOutputStream("C:\\Users\\\\lilin\\Desktop\\"+ finalI +".pcm");
                        int n=0;
                        byte[] z = new byte[640];

                        while ((n = f.read(z)) != -1) {


                            short [] sa = new short[320];
                            ByteBuffer.wrap(z).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(sa);


                                short[] denoisedAudio = rnnDenoise.denoise(sa);

                                byte[] z1 = new byte[640];
                                ByteBuffer.wrap(z1).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().put(denoisedAudio);

                                f1.write(z1);

                        }
                        System.out.println(finalI+"end.");
                        rnnDenoise.close();
                        f1.close();
                    }catch (Exception e){e.printStackTrace();}
                }
            }));
        }


        for (Thread  y:  lts  ) {
            y.start();
        }

        for (Thread  y:  lts  ) {
           try{ y.join();}catch (Exception e){e.printStackTrace();}
        }
        System.out.println("end...");

    }
}

nnom 默认的denoise_weights.h 是单例的无法同时创建多个实例 所以java无法在多线程使用,  可以自己更改下  主要涉及static变量和nnom_tensor_t 需要改用malloc的方式创建。

测试速度挺快的 ,几十分钟的很快降噪完成 ,也可以和freeswitch对接多路实时降噪 在识别,

如果模块或流程觉得麻烦可以到 

https://item.taobao.com/item.htm?id=653611115230

 视频教程yuexiajiayan的个人空间-yuexiajiayan个人主页-哔哩哔哩视频

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:/a/946048.html

如若内容造成侵权/违法违规/事实不符,请联系我们进行投诉反馈qq邮箱809451989@qq.com,一经查实,立即删除!

相关文章

Windows系统 系统盘瘦身策略之文件迁移

1 Android Studio 1.1 .android 该文件夹路径一般在 C:\Users\<user_name>\.android 迁移步骤&#xff1a; ①关闭 Android Studio ②打开环境变量设置&#xff0c;添加以下环境变量 变量名&#xff1a;ANDROID_SDK_HOME 变量值&#xff1a;你自己的路径【不用单独创建.…

SQLiteDataBase数据库

XML界面设计 <?xml version"1.0" encoding"utf-8"?> <LinearLayout xmlns:android"http://schemas.android.com/apk/res/android"xmlns:tools"http://schemas.android.com/tools"android:layout_width"match_paren…

Midjourney技术浅析(七):图像风格化

Midjourney 通过风格迁移&#xff08;Style Transfer&#xff09;和图像滤镜&#xff08;Image Filters&#xff09;技术&#xff0c;使用户能够将生成的图像转换为不同的艺术风格或视觉效果。 一、风格迁移&#xff08;Style Transfer&#xff09; 1.1 风格迁移的定义 风格…

Edge安装问题,安装后出现:Could not find Edge installation

解决&#xff1a;需要再安装&#xff08;MicrosoftEdgeWebView2RuntimeInstallerX64&#xff09;。 网址&#xff1a;https://developer.microsoft.com/zh-cn/microsoft-edge/webview2/?formMA13LH#download 如果已经安装了edge&#xff0c;那就再下载中间这个独立程序安装就…

【JAVA高级篇教学】第六篇:Springboot实现WebSocket

在 Spring Boot 中对接 WebSocket 是一个常见的场景&#xff0c;通常用于实现实时通信。以下是一个完整的 WebSocket 集成步骤&#xff0c;包括服务端和客户端的实现。本期做个简单的测试用例。 目录 一、WebSocket 简介 1. 什么是 WebSocket&#xff1f; 2. WebSocket 的特…

Painter-Mortadela靶场

信息收集 枚举端口 nmap 192.168.109.132 -sS -sV -min-rate 5000 -Pn -p- -p- &#xff1a;扫描所有端口。 (65535)-sS&#xff1a;执行TCP SYN 扫描以快速扫描哪些端口打开。-sC&#xff1a;使用基本识别脚本执行扫描-sV&#xff1a;执行服务扫描–min-rate 5000&#xff1…

攻防世界pwn刷题

get_shell 这题直接给shell了 exp from pwn import* p remote(61.147.171.105,59682) p.sendline(cat flag) p.interactive() cyberpeace{8cd678c722f48327a69b2661ae8956c8} hello_pwn checksec一下 ok&#xff0c;64位的 {alarm(0x3Cu);setbuf(stdout, 0LL);puts("…

1、pycharm、python下载与安装

1、去官网下载pycharm 官网&#xff1a;https://www.jetbrains.com/pycharm/download/?sectionwindows 2、在等待期间&#xff0c;去下载python 进入官网地址&#xff1a;https://www.python.org/downloads/windows/ 3、安装pycharm 桌面会出现快捷方式 4、安装python…

epoll的ET和LT模式

LevelTriggered&#xff1a;简称LT&#xff0c;当FD有数据可读时&#xff0c;会重复通知多次&#xff0c;直至数据处理完成。是epoll的默认模式EdgeTriggered&#xff1a;简称ET&#xff0c;当FD有数据可读时&#xff0c;只通知一次&#xff0c;不管数据是否处理完成 Level是指…

CSS利用浮动实现文字环绕右下角,展开/收起效果

期望实现 文字最多展示 N 行&#xff0c;超出部分截断&#xff0c;并在右下角显示 “…” “更多”&#xff1b; 点击更多&#xff0c;文字展开全部内容&#xff0c;右下角显示“收起”。效果如下&#xff1a; 思路 尽量使用CSS控制样式&#xff0c;减少JS代码复杂度。 利…

单元测试入门和mockup

Java 新手入门&#xff1a;Java单元测试利器&#xff0c;Mock详解_java mock-CSDN博客 这个是典型的before when assert三段式&#xff0c;学一下单测思路 这个没有动态代理&#xff0c;所以是直接class(对比下面) Jmockit使用笔记_增加代码覆盖率_覆盖try catch_使用new Mock…

开发小工具:ping地址

开发小工具&#xff1a;ping地址 import socketdef tcp_port_scan(ip,port):#创建套接字socksocket.socket(socket.AF_INET,socket.SOCK_STREAM)#设置超时sock.settimeout(0.2)try:#发请求result sock.connect_ex((ip,port))if result 0:print(f{ip}--{port}接口连接成功)res…

双汇火腿肠,请勿随意喂猫

在许多家庭中&#xff0c;猫咪作为可爱的宠物成员&#xff0c;备受宠爱。当我们享受着双汇火腿肠的便捷与美味时&#xff0c;或许会有人想到与猫咪分享&#xff0c;但这种看似温馨的举动实则隐藏着诸多问题&#xff0c;双汇火腿肠并不适合喂猫。 从营养成分来看&#xff0c;双…

Unity Excel转Json编辑器工具

功能说明&#xff1a;根据 .xlsx 文件生成对应的 JSON 文件&#xff0c;并自动创建脚本 注意事项 Excel 读取依赖 本功能依赖 EPPlus 库&#xff0c;只能读取 .xlsx 文件。请确保将该脚本放置在 Assets 目录下的 Editor 文件夹中。同时&#xff0c;在 Editor 下再创建一个 Exc…

深信服云桌面系统的终端安全准入设置

深信服的云桌面系统在默认状态下没有终端的安全准入设置&#xff0c;这也意味着同样的虚拟机&#xff0c;使用云桌面终端或者桌面套件都可以登录&#xff0c;但这也给系统带来了一些安全隐患&#xff0c;所以&#xff0c;一般情况下需要设置终端的安全准入策略&#xff0c;防止…

基于SpringBoot的实验室信息管理系统【源码+文档+部署讲解】

系统介绍 视频演示 基于SpringBootVue实现的实验室信息管理系统采用前后端分离的架构方式&#xff0c;系统分为管理员、老师、用户三种角色&#xff0c;实现了用户管理、设备管理、实验室查询、公告、课程、实验室耗材管理、我的等功能 技术选型 开发工具&#xff1a;idea2…

Windows 10 自带功能实现大屏、小屏无线扩展

一、添加可选功能 在作为无线投屏对象的「第二屏」设备上&#xff0c;打开 Windows 10 设置并定位至「应用 > 应用和功能」界面&#xff0c;然后点击右侧界面中的「可选功能」选项。 点击可选功能界面顶部的「添加功能」按钮&#xff0c;搜索「无线显示器」模块并选择添加。…

大电流和大电压采样电路

大电压采样电路&#xff1a; 需要串联多个电阻进行分压&#xff0c;从而一级一级降低电压&#xff0c;防止电阻损坏或者短路直接打穿MCU。 为什么需要加电压跟随器&#xff1a;进行阻抗的隔离&#xff0c;防止MCU的IO阻抗对分压产生影响&#xff1a; 大电流检测电路&#xff…

torch.nn.functional的用法

文章目录 介绍激活函数示例 损失函数示例 卷积操作示例 池化示例 归一化操作示例 Dropout示例 torch.nn.functional 与 torch.nn 的区别 介绍 torch.nn.functional 是 PyTorch 中的一个模块&#xff0c;提供了许多函数式的神经网络操作&#xff0c;包括激活函数、损失函数、卷…

生物信息学软件开发综述学习

目录 ①编程语言和开源工具和库 ②轻量级 R 包开发 ③大规模组学软件开发 ④示例 1.轻量级 R 包开发示例及数据 2.大规模组学软件开发 文献&#xff1a;Bioinformatics software development: Principles and future directions ①编程语言和开源工具和库 在生物信息学…