Unverified Commit d7e9480c authored by Justice's avatar Justice

second section : 3/3 done. report reminds.

parent 0144423d
pkg load communications
[X, Fs] = audioread("voices/v3.wav");
X1 = awgn(X, 50);
output1 = SSA(X1, Fs, .25);
figure(1);
subplot (2, 1, 1)
plot(output1);
title("Noisy Signal - SNR = 50")
subplot (2, 1, 2)
plot(X1);
title("Enhanced Signal")
X2 = awgn(X, 70);
output2 = SSA(X2, Fs, .25);
figure(2);
subplot (2, 1, 1)
plot(output2);
title("Noisy Signal - SNR = 70")
subplot (2, 1, 2)
plot(X2);
title("Enhanced Signal")
X3 = awgn(X, 100);
output3 = SSA(X3, Fs, .25);
figure(3);
subplot (2, 1, 1)
plot(output3);
title("Noisy Signal - SNR = 100")
subplot (2, 1, 2)
plot(X3);
title("Enhanced Signal")
X4 = awgn(X, 120);
output4 = SSA(X4, Fs, .25);
figure(4);
subplot (2, 1, 1)
plot(output4);
title("Noisy Signal - SNR = 120")
subplot (2, 1, 2)
plot(X4);
title("Enhanced Signal")
function output=SSA(signal,fs,IS)
W = fix(.025*fs); %Window length is 25 ms
nfft = W;
SP = 0.4; %Shift percentage is 40% (10ms) %Overlap-Add method works good with this value(.4)
wnd = hamming(W); % returns an W-point symmetric Hamming window.
NIS = fix((IS*fs-W)/(SP*W) +1); %number of initial silence segments
Gamma = 1; %Magnitude Power (1 for magnitude spectral subtraction 2 for power spectrum subtraction)
y = segment(signal,W,SP,wnd);
Y = fft(y,nfft);
YPhase = angle(Y(1:fix(end/2)+1,:)); %Noisy Speech Phase
Y=abs(Y(1:fix(end/2)+1,:)).^Gamma;%Specrogram
numberOfFrames = size(Y,2); % return secod dimention of matrix size [ (m*n) -> n ]
FreqResol=size(Y,1);
N=mean(Y(:,1:NIS)')'; %initial Noise Power Spectrum mean
NRM=zeros(size(N)); % Noise Residual Maximum (Initialization)
NoiseCounter=0;
NoiseLength=9; %This is a smoothing factor for the noise updating
Beta=.03;
YS=Y; %Y Magnitude Averaged
for i=2:(numberOfFrames-1)
YS(:,i)=(Y(:,i-1)+Y(:,i)+Y(:,i+1))/3;
end
for i=1:numberOfFrames
[NoiseFlag, SpeechFlag, NoiseCounter, Dist] = vad(Y(:,i).^(1/Gamma),N.^(1/Gamma),NoiseCounter); %Magnitude Spectrum Distance VAD
if SpeechFlag==0
N=(NoiseLength*N+Y(:,i))/(NoiseLength+1); %Update and smooth noise
NRM=max(NRM,YS(:,i)-N);%Update Maximum Noise Residue
X(:,i)=Beta*Y(:,i);
else
D=YS(:,i)-N; % Specral Subtraction
if i>1 && i<numberOfFrames %Residual Noise Reduction (from 2 to numberOfFrames-1)
for j=1:length(D)
if D(j)<NRM(j)
D(j)=min([D(j) YS(j,i-1)-N(j) YS(j,i+1)-N(j)]);
end
end
end
X(:,i)=max(D,0);
end
end
output=OverlapAdd2(X.^(1/Gamma),YPhase,W,SP*W);
function ReconstructedSignal=OverlapAdd2(XNEW,yphase,windowLen,ShiftLen);
if fix(ShiftLen)~=ShiftLen
ShiftLen=fix(ShiftLen);
disp('The shift length have to be an integer as it is the number of samples.')
disp(['shift length is fixed to ' num2str(ShiftLen)])
end
[FreqRes FrameNum]=size(XNEW);
Spec=XNEW.*exp(j*yphase);
if mod(windowLen,2) %if FreqResol is odd
Spec=[Spec;flipud(conj(Spec(2:end,:)))];
else
Spec=[Spec;flipud(conj(Spec(2:end-1,:)))];
end
sig=zeros((FrameNum-1)*ShiftLen+windowLen,1);
weight=sig;
for i=1:FrameNum
start=(i-1)*ShiftLen+1;
spec=Spec(:,i);
sig(start:start+windowLen-1)=sig(start:start+windowLen-1)+real(ifft(spec,windowLen));
end
ReconstructedSignal=sig;
function [NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(signal,noise,NoiseCounter,NoiseMargin,Hangover)
if nargin<4
NoiseMargin=3;
end
if nargin<5
Hangover=8;
end
if nargin<3
NoiseCounter=0;
end
FreqResol=length(signal);
SpectralDist= 20*(log10(signal)-log10(noise));
SpectralDist(find(SpectralDist<0))=0; % negative dists should be removed.
Dist=mean(SpectralDist);
if (Dist < NoiseMargin)
NoiseFlag=1;
NoiseCounter=NoiseCounter+1;
else
NoiseFlag=0;
NoiseCounter=0;
end
% Detect noise only periods and attenuate the signal
if (NoiseCounter > Hangover)
SpeechFlag=0;
else
SpeechFlag=1; % Detect noise only periods and attenuate the signal
end
function Seg=segment(signal,W,SP,Window)
Window=Window(:); %make it a column vector
L=length(signal);
SP=fix(W.*SP);
N=fix((L-W)/SP +1); %number of segments
Index=(repmat(1:W,N,1)+repmat((0:(N-1))'*SP,1,W))';
hw=repmat(Window,1,N);
Seg=signal(Index).*hw;
\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\zref@newlabel[2]{}
\@writefile{toc}{\contentsline {section}{\numberline {1}تشخیص جنسیت از روی صدا}{1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1} تجزیه و تحلیل طیفی به کمک تبدیل فوریه}{1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2} بررسی دو مورد از صدا ها}{1}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces طیف توان برای صدای ضبط شده مرد شماره 3\relax }}{2}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces طیف توان برای صدای ضبط شده زن شماره 10\relax }}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}اوج صدا}{4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.4}تشخیص جنسیت}{4}}
\@writefile{toc}{\contentsline {section}{\numberline {2} الگوریتم هایی برای بهبود صدا}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}معرفی مختصری از الگوریتم تفاضل طیف}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}پیاده سازی و نکات آن}{6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}تست الگوریتم در مقابل نویزهای متفاوت}{7}}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand*\HyPL@Entry[1]{}
\HyPL@Entry{0<</S/D>>}
\HyPL@Entry{1<</S/D>>}
\@writefile{toc}{\contentsline {section}{\numberline {1}تشخیص جنسیت از روی صدا}{1}{section.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1} تجزیه و تحلیل طیفی به کمک تبدیل فوریه}{1}{subsection.1.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2} بررسی دو مورد از صدا ها}{1}{subsection.1.2}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces طیف توان برای صدای ضبط شده مرد شماره 3\relax }}{2}{figure.caption.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces طیف توان برای صدای ضبط شده زن شماره 10\relax }}{3}{figure.caption.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}اوج صدا}{4}{subsection.1.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.4}تشخیص جنسیت}{4}{subsection.1.4}}
\@writefile{toc}{\contentsline {section}{\numberline {2} الگوریتم هایی برای بهبود صدا}{5}{section.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}معرفی مختصری از الگوریتم تفاضل طیف}{5}{subsection.2.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}پیاده سازی و نکات آن}{6}{subsection.2.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}تست الگوریتم در مقابل نویزهای متفاوت}{7}{subsection.2.3}}
\BOOKMARK [1][-]{section.1}{تشخیص جنسیت از روی صدا}{}% 1
\BOOKMARK [2][-]{subsection.1.1}{ تجزیه و تحلیل طیفی به کمک تبدیل فوریه}{section.1}% 2
\BOOKMARK [2][-]{subsection.1.2}{ بررسی دو مورد از صدا ها}{section.1}% 3
\BOOKMARK [2][-]{subsection.1.3}{اوج صدا}{section.1}% 4
\BOOKMARK [2][-]{subsection.1.4}{تشخیص جنسیت}{section.1}% 5
\BOOKMARK [1][-]{section.2}{ الگوریتم هایی برای بهبود صدا}{}% 6
\BOOKMARK [2][-]{subsection.2.1}{معرفی مختصری از الگوریتم تفاضل طیف}{section.2}% 7
\BOOKMARK [2][-]{subsection.2.2}{پیاده سازی و نکات آن}{section.2}% 8
\BOOKMARK [2][-]{subsection.2.3}{تست الگوریتم در مقابل نویزهای متفاوت}{section.2}% 9
......@@ -4,6 +4,7 @@
\usepackage{wrapfig}
\usepackage{caption}
\usepackage{mathtools}
\usepackage{hyperref}
\usepackage{cleveref}
\usepackage{graphicx}
......@@ -131,9 +132,21 @@ voices/v9.wav women\\
این روش بعد ها اصلاح شد و الگوریتم های پیچیده و روش های غیر خطی به کار گرفته شد تا علاوه بر حذف نویزهای واقعی محیط نویز موزیکال را نیز کاهش دهد.
ما در اینجا الگوریتم بول را پیاده سازی کرده‌ایم که در فایل
\lr{SSBoll79.m}
\lr{SSA.m}
قرار دارد.
روش کار الگوریتم: ما از ناحیه ای که صدا ندارد و به اصطلاح سکوت است کمک میگیریم که نویز را تخمین بزنیم حال فرض میکنیم که این نویز از هر فریم به فریم دیگر زیاد تغییر نمیکند و در هر فریم طیف نویز را از صوت کم میکنیم.
توضیحات الگوریتم در سورس کد بصورت کامنت قرار داده شد.
توضیحات الگوریتم:
\href{http://practicalcryptography.com/miscellaneous/machine-learning/tutorial-spectral-subraction/}{این وب سایت}
\subsection{تست الگوریتم در مقابل نویزهای متفاوت}
با اجرا کردن فایل
\lr{B3.m}
میتوانیم قدرت الگوریتم را در مقابل 4 نویز مشاهده کنیم.
\end{document}
......
function output=SSBoll79(signal,fs,IS)
% OUTPUT=SSBOLL79(S,FS,IS)
% Spectral Subtraction based on Boll 79. Amplitude spectral subtraction
% Includes Magnitude Averaging and Residual noise Reduction
% S is the noisy signal, FS is the sampling frequency and IS is the initial
% silence (noise only) length in seconds (default value is .25 sec)
%
% April-05
% Esfandiar Zavarehei
if (nargin<3 | isstruct(IS))
IS=.25; %seconds
end
W=fix(.025*fs); %Window length is 25 ms
nfft=W;
SP=.4; %Shift percentage is 40% (10ms) %Overlap-Add method works good with this value(.4)
wnd=hamming(W);
% IGNORE THIS SECTION FOR CAMPATIBALITY WITH ANOTHER PROGRAM FROM HERE.....
if (nargin>=3 & isstruct(IS))%This option is for compatibility with another programme
W=IS.windowsize
SP=IS.shiftsize/W;
nfft=IS.nfft;
wnd=IS.window;
if isfield(IS,'IS')
IS=IS.IS;
else
IS=.25;
end
end
% .......IGNORE THIS SECTION FOR CAMPATIBALITY WITH ANOTHER PROGRAM T0 HERE
NIS=fix((IS*fs-W)/(SP*W) +1);%number of initial silence segments
Gamma=1;%Magnitude Power (1 for magnitude spectral subtraction 2 for power spectrum subtraction)
y=segment(signal,W,SP,wnd);
Y=fft(y,nfft);
YPhase=angle(Y(1:fix(end/2)+1,:)); %Noisy Speech Phase
Y=abs(Y(1:fix(end/2)+1,:)).^Gamma;%Specrogram
numberOfFrames=size(Y,2);
FreqResol=size(Y,1);
N=mean(Y(:,1:NIS)')'; %initial Noise Power Spectrum mean
NRM=zeros(size(N));% Noise Residual Maximum (Initialization)
NoiseCounter=0;
NoiseLength=9;%This is a smoothing factor for the noise updating
Beta=.03;
YS=Y; %Y Magnitude Averaged
for i=2:(numberOfFrames-1)
YS(:,i)=(Y(:,i-1)+Y(:,i)+Y(:,i+1))/3;
end
for i=1:numberOfFrames
[NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(Y(:,i).^(1/Gamma),N.^(1/Gamma),NoiseCounter); %Magnitude Spectrum Distance VAD
if SpeechFlag==0
N=(NoiseLength*N+Y(:,i))/(NoiseLength+1); %Update and smooth noise
NRM=max(NRM,YS(:,i)-N);%Update Maximum Noise Residue
X(:,i)=Beta*Y(:,i);
else
D=YS(:,i)-N; % Specral Subtraction
if i>1 && i<numberOfFrames %Residual Noise Reduction
for j=1:length(D)
if D(j)<NRM(j)
D(j)=min([D(j) YS(j,i-1)-N(j) YS(j,i+1)-N(j)]);
end
end
end
X(:,i)=max(D,0);
end
end
output=OverlapAdd2(X.^(1/Gamma),YPhase,W,SP*W);
function ReconstructedSignal=OverlapAdd2(XNEW,yphase,windowLen,ShiftLen);
OverlapAdd2(XNEW,yphase,windowLen,ShiftLen);
%Y=OverlapAdd(X,A,W,S);
%Y is the signal reconstructed signal from its spectrogram. X is a matrix
......@@ -89,41 +11,10 @@ function ReconstructedSignal=OverlapAdd2(XNEW,yphase,windowLen,ShiftLen);
%example in the case of non overlapping signals it is equal to W and in the
%case of %50 overlap is equal to W/2. if not givven W/2 is used. Y is the
%reconstructed time domain signal.
%Sep-04
%Esfandiar Zavarehei
if nargin<2
yphase=angle(XNEW);
end
if nargin<3
windowLen=size(XNEW,1)*2;
end
if nargin<4
ShiftLen=windowLen/2;
end
if fix(ShiftLen)~=ShiftLen
ShiftLen=fix(ShiftLen);
disp('The shift length have to be an integer as it is the number of samples.')
disp(['shift length is fixed to ' num2str(ShiftLen)])
end
[FreqRes FrameNum]=size(XNEW);
Spec=XNEW.*exp(j*yphase);
if mod(windowLen,2) %if FreqResol is odd
Spec=[Spec;flipud(conj(Spec(2:end,:)))];
else
Spec=[Spec;flipud(conj(Spec(2:end-1,:)))];
end
sig=zeros((FrameNum-1)*ShiftLen+windowLen,1);
weight=sig;
for i=1:FrameNum
start=(i-1)*ShiftLen+1;
spec=Spec(:,i);
sig(start:start+windowLen-1)=sig(start:start+windowLen-1)+real(ifft(spec,windowLen));
end
ReconstructedSignal=sig;
function [NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(signal,noise,NoiseCounter,NoiseMargin,Hangover)
......@@ -138,69 +29,17 @@ function [NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(signal,noise,NoiseCount
%NOISECOUNTER returns the number of previous noise segments, this value is
%reset (to zero) whenever a speech segment is detected. DIST is the
%spectral distance.
%Saeed Vaseghi
%edited by Esfandiar Zavarehei
%Sep-04
if nargin<4
NoiseMargin=3;
end
if nargin<5
Hangover=8;
end
if nargin<3
NoiseCounter=0;
end
FreqResol=length(signal);
SpectralDist= 20*(log10(signal)-log10(noise));
SpectralDist(find(SpectralDist<0))=0;
Dist=mean(SpectralDist);
if (Dist < NoiseMargin)
NoiseFlag=1;
NoiseCounter=NoiseCounter+1;
else
NoiseFlag=0;
NoiseCounter=0;
end
% Detect noise only periods and attenuate the signal
if (NoiseCounter > Hangover)
SpeechFlag=0;
else
SpeechFlag=1;
end
function Seg=segment(signal,W,SP,Window)
% SEGMENT chops a signal to overlapping windowed segments
% A= SEGMENT(X,W,SP,WIN) returns a matrix which its columns are segmented
% A = SEGMENT(X,W,SP,WIN) returns a matrix which its columns are segmented
% and windowed frames of the input one dimentional signal, X. W is the
% number of samples per window, default value W=256. SP is the shift
% percentage, default value SP=0.4. WIN is the window that is multiplied by
% each segment and its length should be W. the default window is hamming
% window.
% 06-Sep-04
% Esfandiar Zavarehei
if nargin<3
SP=.4;
end
if nargin<2
W=256;
end
if nargin<4
Window=hamming(W);
end
Window=Window(:); %make it a column vector
L=length(signal);
SP=fix(W.*SP);
N=fix((L-W)/SP +1); %number of segments
Index=(repmat(1:W,N,1)+repmat((0:(N-1))'*SP,1,W))';
hw=repmat(Window,1,N);
Seg=signal(Index).*hw;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment