Unverified Commit d7e9480c authored by Justice's avatar Justice

second section : 3/3 done. report reminds.

parent 0144423d
pkg load communications
[X, Fs] = audioread("voices/v3.wav");
X1 = awgn(X, 50);
output1 = SSA(X1, Fs, .25);
figure(1);
subplot (2, 1, 1)
plot(output1);
title("Noisy Signal - SNR = 50")
subplot (2, 1, 2)
plot(X1);
title("Enhanced Signal")
X2 = awgn(X, 70);
output2 = SSA(X2, Fs, .25);
figure(2);
subplot (2, 1, 1)
plot(output2);
title("Noisy Signal - SNR = 70")
subplot (2, 1, 2)
plot(X2);
title("Enhanced Signal")
X3 = awgn(X, 100);
output3 = SSA(X3, Fs, .25);
figure(3);
subplot (2, 1, 1)
plot(output3);
title("Noisy Signal - SNR = 100")
subplot (2, 1, 2)
plot(X3);
title("Enhanced Signal")
X4 = awgn(X, 120);
output4 = SSA(X4, Fs, .25);
figure(4);
subplot (2, 1, 1)
plot(output4);
title("Noisy Signal - SNR = 120")
subplot (2, 1, 2)
plot(X4);
title("Enhanced Signal")
function output=SSA(signal,fs,IS)
W = fix(.025*fs); %Window length is 25 ms
nfft = W;
SP = 0.4; %Shift percentage is 40% (10ms) %Overlap-Add method works good with this value(.4)
wnd = hamming(W); % returns an W-point symmetric Hamming window.
NIS = fix((IS*fs-W)/(SP*W) +1); %number of initial silence segments
Gamma = 1; %Magnitude Power (1 for magnitude spectral subtraction 2 for power spectrum subtraction)
y = segment(signal,W,SP,wnd);
Y = fft(y,nfft);
YPhase = angle(Y(1:fix(end/2)+1,:)); %Noisy Speech Phase
Y=abs(Y(1:fix(end/2)+1,:)).^Gamma;%Specrogram
numberOfFrames = size(Y,2); % return secod dimention of matrix size [ (m*n) -> n ]
FreqResol=size(Y,1);
N=mean(Y(:,1:NIS)')'; %initial Noise Power Spectrum mean
NRM=zeros(size(N)); % Noise Residual Maximum (Initialization)
NoiseCounter=0;
NoiseLength=9; %This is a smoothing factor for the noise updating
Beta=.03;
YS=Y; %Y Magnitude Averaged
for i=2:(numberOfFrames-1)
YS(:,i)=(Y(:,i-1)+Y(:,i)+Y(:,i+1))/3;
end
for i=1:numberOfFrames
[NoiseFlag, SpeechFlag, NoiseCounter, Dist] = vad(Y(:,i).^(1/Gamma),N.^(1/Gamma),NoiseCounter); %Magnitude Spectrum Distance VAD
if SpeechFlag==0
N=(NoiseLength*N+Y(:,i))/(NoiseLength+1); %Update and smooth noise
NRM=max(NRM,YS(:,i)-N);%Update Maximum Noise Residue
X(:,i)=Beta*Y(:,i);
else
D=YS(:,i)-N; % Specral Subtraction
if i>1 && i<numberOfFrames %Residual Noise Reduction (from 2 to numberOfFrames-1)
for j=1:length(D)
if D(j)<NRM(j)
D(j)=min([D(j) YS(j,i-1)-N(j) YS(j,i+1)-N(j)]);
end
end
end
X(:,i)=max(D,0);
end
end
output=OverlapAdd2(X.^(1/Gamma),YPhase,W,SP*W);
function ReconstructedSignal=OverlapAdd2(XNEW,yphase,windowLen,ShiftLen);
if fix(ShiftLen)~=ShiftLen
ShiftLen=fix(ShiftLen);
disp('The shift length have to be an integer as it is the number of samples.')
disp(['shift length is fixed to ' num2str(ShiftLen)])
end
[FreqRes FrameNum]=size(XNEW);
Spec=XNEW.*exp(j*yphase);
if mod(windowLen,2) %if FreqResol is odd
Spec=[Spec;flipud(conj(Spec(2:end,:)))];
else
Spec=[Spec;flipud(conj(Spec(2:end-1,:)))];
end
sig=zeros((FrameNum-1)*ShiftLen+windowLen,1);
weight=sig;
for i=1:FrameNum
start=(i-1)*ShiftLen+1;
spec=Spec(:,i);
sig(start:start+windowLen-1)=sig(start:start+windowLen-1)+real(ifft(spec,windowLen));
end
ReconstructedSignal=sig;
function [NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(signal,noise,NoiseCounter,NoiseMargin,Hangover)
if nargin<4
NoiseMargin=3;
end
if nargin<5
Hangover=8;
end
if nargin<3
NoiseCounter=0;
end
FreqResol=length(signal);
SpectralDist= 20*(log10(signal)-log10(noise));
SpectralDist(find(SpectralDist<0))=0; % negative dists should be removed.
Dist=mean(SpectralDist);
if (Dist < NoiseMargin)
NoiseFlag=1;
NoiseCounter=NoiseCounter+1;
else
NoiseFlag=0;
NoiseCounter=0;
end
% Detect noise only periods and attenuate the signal
if (NoiseCounter > Hangover)
SpeechFlag=0;
else
SpeechFlag=1; % Detect noise only periods and attenuate the signal
end
function Seg=segment(signal,W,SP,Window)
Window=Window(:); %make it a column vector
L=length(signal);
SP=fix(W.*SP);
N=fix((L-W)/SP +1); %number of segments
Index=(repmat(1:W,N,1)+repmat((0:(N-1))'*SP,1,W))';
hw=repmat(Window,1,N);
Seg=signal(Index).*hw;
\relax \relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\zref@newlabel[2]{} \providecommand\zref@newlabel[2]{}
\@writefile{toc}{\contentsline {section}{\numberline {1}تشخیص جنسیت از روی صدا}{1}} \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1} تجزیه و تحلیل طیفی به کمک تبدیل فوریه}{1}} \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2} بررسی دو مورد از صدا ها}{1}} \global\let\oldcontentsline\contentsline
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces طیف توان برای صدای ضبط شده مرد شماره 3\relax }}{2}} \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces طیف توان برای صدای ضبط شده زن شماره 10\relax }}{3}} \global\let\oldnewlabel\newlabel
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}اوج صدا}{4}} \gdef\newlabel#1#2{\newlabelxx{#1}#2}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.4}تشخیص جنسیت}{4}} \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\@writefile{toc}{\contentsline {section}{\numberline {2} الگوریتم هایی برای بهبود صدا}{5}} \AtEndDocument{\ifx\hyper@anchor\@undefined
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}معرفی مختصری از الگوریتم تفاضل طیف}{5}} \let\contentsline\oldcontentsline
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}پیاده سازی و نکات آن}{6}} \let\newlabel\oldnewlabel
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}تست الگوریتم در مقابل نویزهای متفاوت}{7}} \fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand*\HyPL@Entry[1]{}
\HyPL@Entry{0<</S/D>>}
\HyPL@Entry{1<</S/D>>}
\@writefile{toc}{\contentsline {section}{\numberline {1}تشخیص جنسیت از روی صدا}{1}{section.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1} تجزیه و تحلیل طیفی به کمک تبدیل فوریه}{1}{subsection.1.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2} بررسی دو مورد از صدا ها}{1}{subsection.1.2}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces طیف توان برای صدای ضبط شده مرد شماره 3\relax }}{2}{figure.caption.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces طیف توان برای صدای ضبط شده زن شماره 10\relax }}{3}{figure.caption.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}اوج صدا}{4}{subsection.1.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.4}تشخیص جنسیت}{4}{subsection.1.4}}
\@writefile{toc}{\contentsline {section}{\numberline {2} الگوریتم هایی برای بهبود صدا}{5}{section.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}معرفی مختصری از الگوریتم تفاضل طیف}{5}{subsection.2.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}پیاده سازی و نکات آن}{6}{subsection.2.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}تست الگوریتم در مقابل نویزهای متفاوت}{7}{subsection.2.3}}
\BOOKMARK [1][-]{section.1}{تشخیص جنسیت از روی صدا}{}% 1
\BOOKMARK [2][-]{subsection.1.1}{ تجزیه و تحلیل طیفی به کمک تبدیل فوریه}{section.1}% 2
\BOOKMARK [2][-]{subsection.1.2}{ بررسی دو مورد از صدا ها}{section.1}% 3
\BOOKMARK [2][-]{subsection.1.3}{اوج صدا}{section.1}% 4
\BOOKMARK [2][-]{subsection.1.4}{تشخیص جنسیت}{section.1}% 5
\BOOKMARK [1][-]{section.2}{ الگوریتم هایی برای بهبود صدا}{}% 6
\BOOKMARK [2][-]{subsection.2.1}{معرفی مختصری از الگوریتم تفاضل طیف}{section.2}% 7
\BOOKMARK [2][-]{subsection.2.2}{پیاده سازی و نکات آن}{section.2}% 8
\BOOKMARK [2][-]{subsection.2.3}{تست الگوریتم در مقابل نویزهای متفاوت}{section.2}% 9
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
\usepackage{wrapfig} \usepackage{wrapfig}
\usepackage{caption} \usepackage{caption}
\usepackage{mathtools} \usepackage{mathtools}
\usepackage{hyperref}
\usepackage{cleveref} \usepackage{cleveref}
\usepackage{graphicx} \usepackage{graphicx}
...@@ -131,9 +132,21 @@ voices/v9.wav women\\ ...@@ -131,9 +132,21 @@ voices/v9.wav women\\
این روش بعد ها اصلاح شد و الگوریتم های پیچیده و روش های غیر خطی به کار گرفته شد تا علاوه بر حذف نویزهای واقعی محیط نویز موزیکال را نیز کاهش دهد. این روش بعد ها اصلاح شد و الگوریتم های پیچیده و روش های غیر خطی به کار گرفته شد تا علاوه بر حذف نویزهای واقعی محیط نویز موزیکال را نیز کاهش دهد.
ما در اینجا الگوریتم بول را پیاده سازی کرده‌ایم که در فایل ما در اینجا الگوریتم بول را پیاده سازی کرده‌ایم که در فایل
\lr{SSBoll79.m} \lr{SSA.m}
قرار دارد. قرار دارد.
روش کار الگوریتم: ما از ناحیه ای که صدا ندارد و به اصطلاح سکوت است کمک میگیریم که نویز را تخمین بزنیم حال فرض میکنیم که این نویز از هر فریم به فریم دیگر زیاد تغییر نمیکند و در هر فریم طیف نویز را از صوت کم میکنیم.
توضیحات الگوریتم در سورس کد بصورت کامنت قرار داده شد.
توضیحات الگوریتم:
\href{http://practicalcryptography.com/miscellaneous/machine-learning/tutorial-spectral-subraction/}{این وب سایت}
\subsection{تست الگوریتم در مقابل نویزهای متفاوت} \subsection{تست الگوریتم در مقابل نویزهای متفاوت}
با اجرا کردن فایل
\lr{B3.m}
میتوانیم قدرت الگوریتم را در مقابل 4 نویز مشاهده کنیم.
\end{document} \end{document}
......
function output=SSBoll79(signal,fs,IS) OverlapAdd2(XNEW,yphase,windowLen,ShiftLen);
% OUTPUT=SSBOLL79(S,FS,IS) %Y=OverlapAdd(X,A,W,S);
% Spectral Subtraction based on Boll 79. Amplitude spectral subtraction %Y is the signal reconstructed signal from its spectrogram. X is a matrix
% Includes Magnitude Averaging and Residual noise Reduction %with each column being the fft of a segment of signal. A is the phase
% S is the noisy signal, FS is the sampling frequency and IS is the initial %angle of the spectrum which should have the same dimension as X. if it is
% silence (noise only) length in seconds (default value is .25 sec) %not given the phase angle of X is used which in the case of real values is
% %zero (assuming that its the magnitude). W is the window length of time
% April-05 %domain segments if not given the length is assumed to be twice as long as
% Esfandiar Zavarehei %fft window length. S is the shift length of the segmentation process ( for
%example in the case of non overlapping signals it is equal to W and in the
if (nargin<3 | isstruct(IS)) %case of %50 overlap is equal to W/2. if not givven W/2 is used. Y is the
IS=.25; %seconds %reconstructed time domain signal.
end
W=fix(.025*fs); %Window length is 25 ms
nfft=W;
SP=.4; %Shift percentage is 40% (10ms) %Overlap-Add method works good with this value(.4)
wnd=hamming(W);
function [NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(signal,noise,NoiseCounter,NoiseMargin,Hangover)
% IGNORE THIS SECTION FOR CAMPATIBALITY WITH ANOTHER PROGRAM FROM HERE.....
if (nargin>=3 & isstruct(IS))%This option is for compatibility with another programme %[NOISEFLAG, SPEECHFLAG, NOISECOUNTER, DIST]=vad(SIGNAL,NOISE,NOISECOUNTER,NOISEMARGIN,HANGOVER)
W=IS.windowsize %Spectral Distance Voice Activity Detector
SP=IS.shiftsize/W; %SIGNAL is the the current frames magnitude spectrum which is to labeld as
nfft=IS.nfft; %noise or speech, NOISE is noise magnitude spectrum template (estimation),
wnd=IS.window; %NOISECOUNTER is the number of imediate previous noise frames, NOISEMARGIN
if isfield(IS,'IS') %(default 3)is the spectral distance threshold. HANGOVER ( default 8 )is
IS=IS.IS; %the number of noise segments after which the SPEECHFLAG is reset (goes to
else %zero). NOISEFLAG is set to one if the the segment is labeld as noise
IS=.25; %NOISECOUNTER returns the number of previous noise segments, this value is
end %reset (to zero) whenever a speech segment is detected. DIST is the
end %spectral distance.
% .......IGNORE THIS SECTION FOR CAMPATIBALITY WITH ANOTHER PROGRAM T0 HERE
NIS=fix((IS*fs-W)/(SP*W) +1);%number of initial silence segments function Seg=segment(signal,W,SP,Window)
Gamma=1;%Magnitude Power (1 for magnitude spectral subtraction 2 for power spectrum subtraction)
% SEGMENT chops a signal to overlapping windowed segments
y=segment(signal,W,SP,wnd); % A = SEGMENT(X,W,SP,WIN) returns a matrix which its columns are segmented
Y=fft(y,nfft); % and windowed frames of the input one dimentional signal, X. W is the
YPhase=angle(Y(1:fix(end/2)+1,:)); %Noisy Speech Phase % number of samples per window, default value W=256. SP is the shift
Y=abs(Y(1:fix(end/2)+1,:)).^Gamma;%Specrogram % percentage, default value SP=0.4. WIN is the window that is multiplied by
numberOfFrames=size(Y,2); % each segment and its length should be W. the default window is hamming
FreqResol=size(Y,1); % window.
N=mean(Y(:,1:NIS)')'; %initial Noise Power Spectrum mean
NRM=zeros(size(N));% Noise Residual Maximum (Initialization)
NoiseCounter=0;
NoiseLength=9;%This is a smoothing factor for the noise updating
Beta=.03;
YS=Y; %Y Magnitude Averaged
for i=2:(numberOfFrames-1)
YS(:,i)=(Y(:,i-1)+Y(:,i)+Y(:,i+1))/3;
end
for i=1:numberOfFrames
[NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(Y(:,i).^(1/Gamma),N.^(1/Gamma),NoiseCounter); %Magnitude Spectrum Distance VAD
if SpeechFlag==0
N=(NoiseLength*N+Y(:,i))/(NoiseLength+1); %Update and smooth noise
NRM=max(NRM,YS(:,i)-N);%Update Maximum Noise Residue
X(:,i)=Beta*Y(:,i);
else
D=YS(:,i)-N; % Specral Subtraction
if i>1 && i<numberOfFrames %Residual Noise Reduction
for j=1:length(D)
if D(j)<NRM(j)
D(j)=min([D(j) YS(j,i-1)-N(j) YS(j,i+1)-N(j)]);
end
end
end
X(:,i)=max(D,0);
end
end
output=OverlapAdd2(X.^(1/Gamma),YPhase,W,SP*W);
function ReconstructedSignal=OverlapAdd2(XNEW,yphase,windowLen,ShiftLen);
%Y=OverlapAdd(X,A,W,S);
%Y is the signal reconstructed signal from its spectrogram. X is a matrix
%with each column being the fft of a segment of signal. A is the phase
%angle of the spectrum which should have the same dimension as X. if it is
%not given the phase angle of X is used which in the case of real values is
%zero (assuming that its the magnitude). W is the window length of time
%domain segments if not given the length is assumed to be twice as long as
%fft window length. S is the shift length of the segmentation process ( for
%example in the case of non overlapping signals it is equal to W and in the
%case of %50 overlap is equal to W/2. if not givven W/2 is used. Y is the
%reconstructed time domain signal.
%Sep-04
%Esfandiar Zavarehei
if nargin<2
yphase=angle(XNEW);
end
if nargin<3
windowLen=size(XNEW,1)*2;
end
if nargin<4
ShiftLen=windowLen/2;
end
if fix(ShiftLen)~=ShiftLen
ShiftLen=fix(ShiftLen);
disp('The shift length have to be an integer as it is the number of samples.')
disp(['shift length is fixed to ' num2str(ShiftLen)])
end
[FreqRes FrameNum]=size(XNEW);
Spec=XNEW.*exp(j*yphase);
if mod(windowLen,2) %if FreqResol is odd
Spec=[Spec;flipud(conj(Spec(2:end,:)))];
else
Spec=[Spec;flipud(conj(Spec(2:end-1,:)))];
end
sig=zeros((FrameNum-1)*ShiftLen+windowLen,1);
weight=sig;
for i=1:FrameNum
start=(i-1)*ShiftLen+1;
spec=Spec(:,i);
sig(start:start+windowLen-1)=sig(start:start+windowLen-1)+real(ifft(spec,windowLen));
end
ReconstructedSignal=sig;
function [NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(signal,noise,NoiseCounter,NoiseMargin,Hangover)
%[NOISEFLAG, SPEECHFLAG, NOISECOUNTER, DIST]=vad(SIGNAL,NOISE,NOISECOUNTER,NOISEMARGIN,HANGOVER)
%Spectral Distance Voice Activity Detector
%SIGNAL is the the current frames magnitude spectrum which is to labeld as
%noise or speech, NOISE is noise magnitude spectrum template (estimation),
%NOISECOUNTER is the number of imediate previous noise frames, NOISEMARGIN
%(default 3)is the spectral distance threshold. HANGOVER ( default 8 )is
%the number of noise segments after which the SPEECHFLAG is reset (goes to
%zero). NOISEFLAG is set to one if the the segment is labeld as noise
%NOISECOUNTER returns the number of previous noise segments, this value is
%reset (to zero) whenever a speech segment is detected. DIST is the
%spectral distance.
%Saeed Vaseghi
%edited by Esfandiar Zavarehei
%Sep-04
if nargin<4
NoiseMargin=3;
end
if nargin<5
Hangover=8;
end
if nargin<3
NoiseCounter=0;
end
FreqResol=length(signal);
SpectralDist= 20*(log10(signal)-log10(noise));
SpectralDist(find(SpectralDist<0))=0;
Dist=mean(SpectralDist);
if (Dist < NoiseMargin)
NoiseFlag=1;
NoiseCounter=NoiseCounter+1;
else
NoiseFlag=0;
NoiseCounter=0;
end
% Detect noise only periods and attenuate the signal
if (NoiseCounter > Hangover)
SpeechFlag=0;
else
SpeechFlag=1;
end
function Seg=segment(signal,W,SP,Window)
% SEGMENT chops a signal to overlapping windowed segments
% A= SEGMENT(X,W,SP,WIN) returns a matrix which its columns are segmented
% and windowed frames of the input one dimentional signal, X. W is the
% number of samples per window, default value W=256. SP is the shift
% percentage, default value SP=0.4. WIN is the window that is multiplied by
% each segment and its length should be W. the default window is hamming
% window.
% 06-Sep-04
% Esfandiar Zavarehei
if nargin<3
SP=.4;
end
if nargin<2
W=256;
end
if nargin<4
Window=hamming(W);
end
Window=Window(:); %make it a column vector
L=length(signal);
SP=fix(W.*SP);
N=fix((L-W)/SP +1); %number of segments
Index=(repmat(1:W,N,1)+repmat((0:(N-1))'*SP,1,W))';
hw=repmat(Window,1,N);
Seg=signal(Index).*hw;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment