C # Pitch shift of wave files

I am currently trying to perform a height shift of a wave file using this algorithm

https://sites.google.com/site/mikescoderama/pitch-shifting

Here is my code that uses the above implementation but no luck. The output wave file seems damaged or invalid.

The code is pretty simple, except for the pitch shifting algorithm.

  • It loads the wave file, it reads the wave file data and puts it in byte [].
  • Then it "normalizes" byte data in a format from -1.0f to 1.0f (as requested by the creator of the pitch shifting algorithm).
  • It applies the pitch shifting algorithm, and then converts the normalized data back to the bytes [] array.
  • Finally, the wave file with the same header of the original wave file and shifted tone data is saved.

Did I miss something?

static void Main(string[] args) { // Read the wave file data bytes byte[] waveheader = null; byte[] wavedata = null; using (BinaryReader reader = new BinaryReader(File.OpenRead("sound.wav"))) { // Read first 44 bytes (header); waveheader= reader.ReadBytes(44); // Read data wavedata = reader.ReadBytes((int)reader.BaseStream.Length - 44); } short nChannels = BitConverter.ToInt16(waveheader, 22); int sampleRate = BitConverter.ToInt32(waveheader, 24); short bitRate = BitConverter.ToInt16(waveheader, 34); // Normalized data store. Store values in the format -1.0 to 1.0 float[] in_data = new float[wavedata.Length / 2]; // Normalize wave data into -1.0 to 1.0 values using(BinaryReader reader = new BinaryReader(new MemoryStream(wavedata))) { for (int i = 0; i < in_data.Length; i++) { if(bitRate == 16) in_data[i] = reader.ReadInt16() / 32768f; if (bitRate == 8) in_data[i] = (reader.ReadByte() - 128) / 128f; } } //PitchShifter.PitchShift(1f, in_data.Length, (long)1024, (long)32, sampleRate, in_data); // Backup wave data byte[] copydata = new byte[wavedata.Length]; Array.Copy(wavedata, copydata, wavedata.Length); // Revert data to byte format Array.Clear(wavedata, 0, wavedata.Length); using (BinaryWriter writer = new BinaryWriter(new MemoryStream(wavedata))) { for (int i = 0; i < in_data.Length; i++) { if(bitRate == 16) writer.Write((short)(in_data[i] * 32768f)); if (bitRate == 8) writer.Write((byte)((in_data[i] * 128f) + 128)); } } // Compare new wavedata with copydata if (wavedata.SequenceEqual(copydata)) { Console.WriteLine("Data has no changes"); } else { Console.WriteLine("Data has changed!"); } // Save modified wavedata string targetFilePath = "sound_low.wav"; if (File.Exists(targetFilePath)) File.Delete(targetFilePath); using (BinaryWriter writer = new BinaryWriter(File.OpenWrite(targetFilePath))) { writer.Write(waveheader); writer.Write(wavedata); } Console.ReadLine(); } 
+6
source share
2 answers

The algorithm works fine here

https://sites.google.com/site/mikescoderama/pitch-shifting

My mistake was how I read the wave header and wave data. I post here the full working code

WARNING: this code only works for 16-bit (stereo / mono) PCM waves. It can be easily adapted to work with 8-bit PCM.

  static void Main(string[] args) { // Read header, data and channels as separated data // Normalized data stores. Store values in the format -1.0 to 1.0 byte[] waveheader = null; byte[] wavedata = null; int sampleRate = 0; float[] in_data_l = null; float[] in_data_r = null; GetWaveData("sound.wav", out waveheader, out wavedata, out sampleRate, out in_data_l, out in_data_r); // // Apply Pitch Shifting // if(in_data_l != null) PitchShifter.PitchShift(2f, in_data_l.Length, (long)1024, (long)10, sampleRate, in_data_l); if(in_data_r != null) PitchShifter.PitchShift(2f, in_data_r.Length, (long)1024, (long)10, sampleRate, in_data_r); // // Time to save the processed data // // Backup wave data byte[] copydata = new byte[wavedata.Length]; Array.Copy(wavedata, copydata, wavedata.Length); GetWaveData(in_data_l, in_data_r, ref wavedata); // // Check if data actually changed // bool noChanges = true; for (int i = 0; i < wavedata.Length; i++) { if (wavedata[i] != copydata[i]) { noChanges = false; Console.WriteLine("Data has changed!"); break; } } if(noChanges) Console.WriteLine("Data has no changes"); // Save modified wavedata string targetFilePath = "sound_low.wav"; if (File.Exists(targetFilePath)) File.Delete(targetFilePath); using (BinaryWriter writer = new BinaryWriter(File.OpenWrite(targetFilePath))) { writer.Write(waveheader); writer.Write(wavedata); } Console.ReadLine(); } // Returns left and right float arrays. 'right' will be null if sound is mono. public static void GetWaveData(string filename, out byte[] header, out byte[] data, out int sampleRate, out float[] left, out float[] right) { byte[] wav = File.ReadAllBytes(filename); // Determine if mono or stereo int channels = wav[22]; // Forget byte 23 as 99.999% of WAVs are 1 or 2 channels // Get sample rate sampleRate = BitConverter.ToInt32(wav, 24); int pos = 12; // Keep iterating until we find the data chunk (ie 64 61 74 61 ...... (ie 100 97 116 97 in decimal)) while(!(wav[pos]==100 && wav[pos+1]==97 && wav[pos+2]==116 && wav[pos+3]==97)) { pos += 4; int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216; pos += 4 + chunkSize; } pos += 4; int subchunk2Size = BitConverter.ToInt32(wav, pos); pos += 4; // Pos is now positioned to start of actual sound data. int samples = subchunk2Size / 2; // 2 bytes per sample (16 bit sound mono) if (channels == 2) samples /= 2; // 4 bytes per sample (16 bit stereo) // Allocate memory (right will be null if only mono sound) left = new float[samples]; if (channels == 2) right = new float[samples]; else right = null; header = new byte[pos]; Array.Copy(wav, header, pos); data = new byte[subchunk2Size]; Array.Copy(wav, pos, data, 0, subchunk2Size); // Write to float array/s: int i=0; while (pos < subchunk2Size) { left[i] = BytesToNormalized_16(wav[pos], wav[pos + 1]); pos += 2; if (channels == 2) { right[i] = BytesToNormalized_16(wav[pos], wav[pos + 1]); pos += 2; } i++; } } // Return byte data from left and right float data. Ignore right when sound is mono public static void GetWaveData(float[] left, float[] right, ref byte[] data) { // Calculate k // This value will be used to convert float to Int16 // We are not using Int16.Max to avoid peaks due to overflow conversions float k = (float)Int16.MaxValue / left.Select(x => Math.Abs(x)).Max(); // Revert data to byte format Array.Clear(data, 0, data.Length); int dataLenght = left.Length; int byteId = -1; using (BinaryWriter writer = new BinaryWriter(new MemoryStream(data))) { for (int i = 0; i < dataLenght; i++) { byte byte1 = 0; byte byte2 = 0; byteId++; NormalizedToBytes_16(left[i], k, out byte1, out byte2); writer.Write(byte1); writer.Write(byte2); if (right != null) { byteId++; NormalizedToBytes_16(right[i], k, out byte1, out byte2); writer.Write(byte1); writer.Write(byte2); } } } } // Convert two bytes to one double in the range -1 to 1 static float BytesToNormalized_16(byte firstByte, byte secondByte) { // convert two bytes to one short (little endian) short s = (short)((secondByte << 8) | firstByte); // convert to range from -1 to (just below) 1 return s / 32678f; } // Convert a float value into two bytes (use k as conversion value and not Int16.MaxValue to avoid peaks) static void NormalizedToBytes_16(float value, float k, out byte firstByte, out byte secondByte) { short s = (short)(value * k); firstByte = (byte)(s & 0x00FF); secondByte = (byte)(s >> 8); } 
+4
source

Sorry to revive this, but I tried this class of audio signal, and although it works, I get cracks in the sound when cropping (0.5f). Do you handle this?

0
source

Source: https://habr.com/ru/post/955001/


All Articles