まず、私の下手な英語で申し訳ありません。数週間前に Objective-C を使い始めたばかりです。2 つの iOS デバイスから録音された 2 つのオーディオ信号を比較する必要があるプロジェクトを行っています。これまでのところ、iPhone 4s と iPhone 4 から 2 つの .aif ファイルを記録することができました。次に、次のアルゴリズム「高度に堅牢なオーディオ フィンガープリンティング システム」を適用してみます。by: Jaap Haitsma" を使用して 2 つのフィンガープリント (バイナリ ビット パターン 101011010) を取得し、それらをビットごとに比較します。しかし、これまでのところ、得られた結果は 45% から 55% の間であり、ほぼランダムな確率です。 0 と 1 の間. だから誰かが私に何かアドバイスを与えることができます. これまでのコードは次のとおりです:
CalculateFingerprint *myCalculateFingerprint = [CalculateFingerprint alloc];
SInt16 *inputBuffer;
path4 = [documentsDirectory stringByAppendingPathComponent:fileName4];
/////////Calculate for the 4 file
fileURL = [NSURL fileURLWithPath:path4];
status = AudioFileOpenURL((__bridge CFURLRef)fileURL, kAudioFileReadPermission,kAudioFileAIFFType, &myAudioFile);
status = AudioFileGetPropertyInfo(myAudioFile,
kAudioFilePropertyAudioDataPacketCount,
&propertySizeDataPacketCount,
&writabilityDataPacketCount);
status = AudioFileGetProperty(myAudioFile,
kAudioFilePropertyAudioDataPacketCount,
&propertySizeDataPacketCount,
&numberOfPackets);
status = AudioFileGetPropertyInfo (myAudioFile,
kAudioFilePropertyMaximumPacketSize,
&propertySizeMaxPacketSize,
&writabilityMaxPacketSize);
status = AudioFileGetProperty(myAudioFile,
kAudioFilePropertyMaximumPacketSize,
&propertySizeMaxPacketSize,
&maxPacketSize);
inputBuffer = (SInt16 *)malloc(numberOfPackets * maxPacketSize);
currentPacket = 0;
status = AudioFileReadPackets(myAudioFile,
false, &numberOfBytesRead,
NULL,
currentPacket,
&numberOfPackets,
inputBuffer);
[myCalculateFingerprint calculateFingerprint:inputBuffer sampleCount:numberOfPackets index:indexFile];
status = AudioFileClose(myAudioFile);
指紋コードの計算は次のとおりです。
-(void)calculateFingerprint :(SInt16*)samples
sampleCount:(int)sampleCount
index:(int)indexFile{
//Divide the audio signal into 32 frames
frames myFrames [32];
int stepFrames = sampleCount / 62;
int number = 0;
int index ;
for (int i = 0; i < 32; ++i){
index = 0;
myFrames[i].start = number;
myFrames[i].end = number + (32*stepFrames);
myFrames[i].dataFrames = (SInt16*)malloc((myFrames[i].end -number+1)*sizeof(SInt16));
for (int j = number;j<=myFrames[i].end; ++j){
myFrames[i].dataFrames[index] = samples[j];
++index;
}
number = number + stepFrames;
}
//Calculate FFT for each of the audio signal frames.
CalculateFFT *myCalculateFFT = [[CalculateFFT alloc] init];
theFFT myFFTData [32];
for (int i = 0; i <32; ++i){
myFFTData[i].FFTdata = [myCalculateFFT calculateFFTForData:myFrames[i].dataFrames];
}
//each index represent the frequency as followed:
// index i is frequency i * 44100/1024
//We only need 33 bands from 300 Hz to 2000Hz, so we will get the FFTdata from the index 7 to 40
float energy [33][33];
for (int i =0; i < 33; ++i){
energy[0][i] = 0;
}
int stepBand;
for (int i = 1; i < 33; ++i){
for (int j = 0; j < 33; ++j){
energy[i][j] = myFFTData[i].FFTdata[j+7];
}
}
//next we calculate the bits for the audio fingerprint
Float32 check = 0;
int fingerPrint [32][32];
NSMutableString *result = [[NSMutableString alloc]init];
for (int i = 0; i < 32; ++i){
for (int j = 0; j <32; ++j){
check = energy[i+1][j] -energy[i+1][j+1] -energy[i][j] +energy[i][j+1];
if (check > 0){
fingerPrint[i][j] = 1;//[tempBitFingerPrint addObject:[NSNumber numberWithInt:1]];
}else {
fingerPrint[i][j] = 0;//[tempBitFingerPrint addObject:[NSNumber numberWithInt:0]];
}
[result appendString:[NSString stringWithFormat:@"%d",fingerPrint[i][j]]];
}
}
最後に FFT 計算コード:
-(void)FFTSetup{
UInt32 maxFrames = 1024;
originalReal = (float*) malloc(maxFrames*sizeof(float));
originalRealTransfer = (float*)malloc(maxFrames*sizeof(float));
obtainedReal = (float*) malloc(maxFrames *sizeof(float));
freqArray = (Float32*) malloc((maxFrames/2) *sizeof(Float32));
fftLog2n = log2f(maxFrames);
fftN = 1 << fftLog2n;
fftNOver2 = maxFrames/2;
fftBufferCapacity = maxFrames;
fftIndex = 0;
fftA.realp = (float*)malloc(fftNOver2*sizeof(float));
fftA.imagp = (float*)malloc(fftNOver2*sizeof(float));
fftSetup = vDSP_create_fftsetup(fftLog2n,FFT_RADIX2);
}
-(Float32*) calculateFFTForData:(SInt16*)sampleData { [self FFTSetup];
int stride = 1;
for (int i = 0; i < fftN; ++i){
originalReal[i] = (float) sampleData[i];
}
UInt32 maxFrames = 1024;
//Apply Hann window on the data
int windowSize = maxFrames;
float * window = (float*)malloc(sizeof(float)*windowSize);
memset(window, 0, sizeof(float)*windowSize);
vDSP_hann_window(window, windowSize, vDSP_HANN_NORM);
vDSP_vmul(originalReal,1,window,1,originalRealTransfer,1,windowSize);
vDSP_ctoz((COMPLEX*) originalRealTransfer,2,&fftA,1,fftNOver2);
vDSP_fft_zrip(fftSetup,&fftA, stride,fftLog2n,FFT_FORWARD);
float scale = (float) 1.0 /(2*fftN);
vDSP_vsmul(fftA.realp,1,&scale,fftA.realp,1,fftNOver2);
vDSP_vsmul(fftA.imagp,1,&scale,fftA.imagp,1,fftNOver2);
vDSP_ztoc(&fftA,1,(COMPLEX*)obtainedReal,2,fftNOver2);
int index = 0;
NSMutableString *testResult = [[NSMutableString alloc]init];
for (int i = 0; i < fftN; i=i+2){
freqArray[index] = (obtainedReal[i]*obtainedReal[i])+(obtainedReal[i+1]*obtainedReal[i+1]);
[testResult appendString:[NSString stringWithFormat:@"%f ",freqArray[index]]];
++index;
}
return freqArray;
}