Bluetooth's SBC codec is pretty good (for a low-complexity royalty-free codec, that is). Implementing it is trivial (the spec shows you how) but optimizing is not. Presented here is a rather optimal C implementation, that makes it obvious how to vectorize it(if your architecture allows it) and with lots of knobs to twist. Obviously no FPU is used. A few quality settings exist. First, one can set the precision of the fixed-point math to 32, 16, or 8 bits (which, respectively used 64, 32 and 16-bit temp variables for multiplication). One can also choose to shift instead of divide, causing some errors in quieter sounds, but saving lots of cycles in machines where divides are slow.
The strange tables you see were generated by some scripts I made. they are in the bottom
I did all this in my free time in one day (May 26). This code is free to use for non-commercial purposes (for commercial purposes, contact me). Any copies of the code must include this license and credit to me with them at all times.
#define QUALITY_LOWEST 1 //you may notice the quality reduction #define QUALITY_MEDIUM 2 //pretty good #define QUALITY_GREAT 3 //as good as it will get without an FPU ///config options begin #define QUALITY QUALITY_GREAT //#define SPEED_OVER_ACCURACY //set to cheat a bit with shifts (saves a divide per sample) #define ITER uint8_t //iterator up to 180 use fastest type for your platform ///config options end #if QUALITY == QUALITY_LOWEST #define CONST(x) (x >> 24) #define SAMPLE_CVT(x) (x >> 8) #define INSAMPLE int8_t #define OUTSAMPLE uint8_t //no point producing 16-bit samples using the 8-bit decoder #define FIXED int8_t #define FIXED_S int16_t #define OUT_CLIP_MAX 0x7F #define OUT_CLIP_MIN -0x80 #define NUM_FRAC_BITS_PROTO 8 #define NUM_FRAC_BITS_COS 6 #elif QUALITY == QUALITY_MEDIUM #define CONST(x) (x >> 16) #define SAMPLE_CVT(x) (x) #define INSAMPLE int16_t #define OUTSAMPLE uint16_t #define FIXED int16_t #define FIXED_S int32_t #define OUT_CLIP_MAX 0x7FFF #define OUT_CLIP_MIN -0x8000 #define NUM_FRAC_BITS_PROTO 16 #define NUM_FRAC_BITS_COS 14 #elif QUALITY == QUALITY_GREAT #define CONST(x) (x) #define SAMPLE_CVT(x) (x) #define INSAMPLE int16_t #define OUTSAMPLE uint16_t #define FIXED int32_t #define FIXED_S int64_t #define OUT_CLIP_MAX 0x7FFF #define OUT_CLIP_MIN -0x8000 #define NUM_FRAC_BITS_PROTO 32 #define NUM_FRAC_BITS_COS 30 #else #error "You did not define SBC decoder synthesizer quality to use" #endif # static const FIXED proto_4_40[] = { CONST(0x00000000), CONST(0x00FB7991), CONST(0x02CB3E8B), CONST(0x069FDC59), CONST(0x22B63DA5), CONST(0x4B583FE6), CONST(0xDD49C25B), CONST(0x069FDC59), CONST(0xFD34C175), CONST(0x00FB7991), CONST(0x002329CC), CONST(0x00FF11CA), CONST(0x053B7546), CONST(0x0191E578), CONST(0x31EAB920), CONST(0x4825E4A3), CONST(0xEC1F5E6D), CONST(0x083DDC80), CONST(0xFF3773A8), CONST(0x00B32807), CONST(0x0061C5A7), CONST(0x007A4737), CONST(0x07646684), CONST(0xF89F23A7), CONST(0x3F23948D), CONST(0x3F23948D), CONST(0xF89F23A7), CONST(0x07646684), CONST(0x007A4737), CONST(0x0061C5A7), CONST(0x00B32807), CONST(0xFF3773A8), CONST(0x083DDC80), CONST(0xEC1F5E6D), CONST(0x4825E4A3), CONST(0x31EAB920), CONST(0x0191E578), CONST(0x053B7546), CONST(0x00FF11CA), CONST(0x002329CC) }; static const FIXED proto_8_80[] = { CONST(0x00000000), CONST(0x0083D8D4), CONST(0x0172E691), CONST(0x034FD9E0), CONST(0x116860F5), CONST(0x259ED8EB), CONST(0xEE979F0B), CONST(0x034FD9E0), CONST(0xFE8D196F), CONST(0x0083D8D4), CONST(0x000A42E6), CONST(0x0089DE90), CONST(0x020E372C), CONST(0x02447D75), CONST(0x153E7D35), CONST(0x253844DE), CONST(0xF2625120), CONST(0x03EBE849), CONST(0xFF1ACF26), CONST(0x0074E5CF), CONST(0x00167EE3), CONST(0x0082B6EC), CONST(0x02AD6794), CONST(0x00BFA1FF), CONST(0x18FAB36D), CONST(0x24086BF5), CONST(0xF5FF2BF8), CONST(0x04270CA8), CONST(0xFF93E21B), CONST(0x0060C1E9), CONST(0x002458FC), CONST(0x0069F16C), CONST(0x03436717), CONST(0xFEBDD6E5), CONST(0x1C7762DF), CONST(0x221D9DE0), CONST(0xF950DCFC), CONST(0x0412523E), CONST(0xFFF44825), CONST(0x004AB4C5), CONST(0x0035FF13), CONST(0x003B1FA4), CONST(0x03C04499), CONST(0xFC4086B8), CONST(0x1F8E43F2), CONST(0x1F8E43F2), CONST(0xFC4086B8), CONST(0x03C04499), CONST(0x003B1FA4), CONST(0x0035FF13), CONST(0x004AB4C5), CONST(0xFFF44825), CONST(0x0412523E), CONST(0xF950DCFC), CONST(0x221D9DE0), CONST(0x1C7762DF), CONST(0xFEBDD6E5), CONST(0x03436717), CONST(0x0069F16C), CONST(0x002458FC), CONST(0x0060C1E9), CONST(0xFF93E21B), CONST(0x04270CA8), CONST(0xF5FF2BF8), CONST(0x24086BF5), CONST(0x18FAB36D), CONST(0x00BFA1FF), CONST(0x02AD6794), CONST(0x0082B6EC), CONST(0x00167EE3), CONST(0x0074E5CF), CONST(0xFF1ACF26), CONST(0x03EBE849), CONST(0xF2625120), CONST(0x253844DE), CONST(0x153E7D35), CONST(0x02447D75), CONST(0x020E372C), CONST(0x0089DE90), CONST(0x000A42E6) }; static const FIXED costab_4[] = { CONST(0x2D413CCD), CONST(0xD2BEC333), CONST(0xD2BEC333), CONST(0x2D413CCD), CONST(0x187DE2A7), CONST(0xC4DF2862), CONST(0x3B20D79E), CONST(0xE7821D59), CONST(0x00000000), CONST(0x00000000), CONST(0x00000000), CONST(0x00000000), CONST(0xE7821D59), CONST(0x3B20D79E), CONST(0xC4DF2862), CONST(0x187DE2A7), CONST(0xD2BEC333), CONST(0x2D413CCD), CONST(0x2D413CCD), CONST(0xD2BEC333), CONST(0xC4DF2862), CONST(0xE7821D59), CONST(0x187DE2A7), CONST(0x3B20D79E), CONST(0xC0000000), CONST(0xC0000000), CONST(0xC0000000), CONST(0xC0000000), CONST(0xC4DF2862), CONST(0xE7821D59), CONST(0x187DE2A7), CONST(0x3B20D79E) }; static const FIXED costab_8[] = { CONST(0x2D413CCD), CONST(0xD2BEC333), CONST(0xD2BEC333), CONST(0x2D413CCD), CONST(0x2D413CCD), CONST(0xD2BEC333), CONST(0xD2BEC333), CONST(0x2D413CCD), CONST(0x238E7673), CONST(0xC13AD060), CONST(0x0C7C5C1E), CONST(0x3536CC52), CONST(0xCAC933AE), CONST(0xF383A3E2), CONST(0x3EC52FA0), CONST(0xDC71898D), CONST(0x187DE2A7), CONST(0xC4DF2862), CONST(0x3B20D79E), CONST(0xE7821D59), CONST(0xE7821D59), CONST(0x3B20D79E), CONST(0xC4DF2862), CONST(0x187DE2A7), CONST(0x0C7C5C1E), CONST(0xDC71898D), CONST(0x3536CC52), CONST(0xC13AD060), CONST(0x3EC52FA0), CONST(0xCAC933AE), CONST(0x238E7673), CONST(0xF383A3E2), CONST(0x00000000), CONST(0x00000000), CONST(0x00000000), CONST(0x00000000), CONST(0x00000000), CONST(0x00000000), CONST(0x00000000), CONST(0x00000000), CONST(0xF383A3E2), CONST(0x238E7673), CONST(0xCAC933AE), CONST(0x3EC52FA0), CONST(0xC13AD060), CONST(0x3536CC52), CONST(0xDC71898D), CONST(0x0C7C5C1E), CONST(0xE7821D59), CONST(0x3B20D79E), CONST(0xC4DF2862), CONST(0x187DE2A7), CONST(0x187DE2A7), CONST(0xC4DF2862), CONST(0x3B20D79E), CONST(0xE7821D59), CONST(0xDC71898D), CONST(0x3EC52FA0), CONST(0xF383A3E2), CONST(0xCAC933AE), CONST(0x3536CC52), CONST(0x0C7C5C1E), CONST(0xC13AD060), CONST(0x238E7673), CONST(0xD2BEC333), CONST(0x2D413CCD), CONST(0x2D413CCD), CONST(0xD2BEC333), CONST(0xD2BEC333), CONST(0x2D413CCD), CONST(0x2D413CCD), CONST(0xD2BEC333), CONST(0xCAC933AE), CONST(0x0C7C5C1E), CONST(0x3EC52FA0), CONST(0x238E7673), CONST(0xDC71898D), CONST(0xC13AD060), CONST(0xF383A3E2), CONST(0x3536CC52), CONST(0xC4DF2862), CONST(0xE7821D59), CONST(0x187DE2A7), CONST(0x3B20D79E), CONST(0x3B20D79E), CONST(0x187DE2A7), CONST(0xE7821D59), CONST(0xC4DF2862), CONST(0xC13AD060), CONST(0xCAC933AE), CONST(0xDC71898D), CONST(0xF383A3E2), CONST(0x0C7C5C1E), CONST(0x238E7673), CONST(0x3536CC52), CONST(0x3EC52FA0), CONST(0xC0000000), CONST(0xC0000000), CONST(0xC0000000), CONST(0xC0000000), CONST(0xC0000000), CONST(0xC0000000), CONST(0xC0000000), CONST(0xC0000000), CONST(0xC13AD060), CONST(0xCAC933AE), CONST(0xDC71898D), CONST(0xF383A3E2), CONST(0x0C7C5C1E), CONST(0x238E7673), CONST(0x3536CC52), CONST(0x3EC52FA0), CONST(0xC4DF2862), CONST(0xE7821D59), CONST(0x187DE2A7), CONST(0x3B20D79E), CONST(0x3B20D79E), CONST(0x187DE2A7), CONST(0xE7821D59), CONST(0xC4DF2862), CONST(0xCAC933AE), CONST(0x0C7C5C1E), CONST(0x3EC52FA0), CONST(0x238E7673), CONST(0xDC71898D), CONST(0xC13AD060), CONST(0xF383A3E2), CONST(0x3536CC52) }; static void synth_4(OUTSAMPLE* dst, const INSAMPLE* src, FIXED* V){ ITER i, j; const FIXED* tabl = proto_4_40; const FIXED* costab = costab_4; //shift for(i = 79; i >= 8; i--) V[i] = V[i - 8]; //matrix for(i = 0; i < 8; i++){ FIXED_S t = (FIXED_S)costab[0] * (FIXED_S)src[0] + (FIXED_S)costab[1] * (FIXED_S)src[1] + (FIXED_S)costab[2] * (FIXED_S)src[2] + (FIXED_S)costab[3] * (FIXED_S)src[3]; costab += 4; V[i] = t >> NUM_FRAC_BITS_COS; } //calculate audio samples for(j = 0; j < 4; j++){ OUTSAMPLE s; FIXED_S sample = (FIXED_S)V[j + 0] * (FIXED_S)tabl[0] + (FIXED_S)V[j + 12] * (FIXED_S)tabl[1] + (FIXED_S)V[j + 16] * (FIXED_S)tabl[2] + (FIXED_S)V[j + 28] * (FIXED_S)tabl[3] + (FIXED_S)V[j + 32] * (FIXED_S)tabl[4] + (FIXED_S)V[j + 44] * (FIXED_S)tabl[5] + (FIXED_S)V[j + 48] * (FIXED_S)tabl[6] + (FIXED_S)V[j + 60] * (FIXED_S)tabl[7] + (FIXED_S)V[j + 64] * (FIXED_S)tabl[8] + (FIXED_S)V[j + 76] * (FIXED_S)tabl[9]; tabl += 10; sample >>= (NUM_FRAC_BITS_PROTO - 1 - 2); //-2 is for the -4 we need to multiply by :) sample = -sample; if(sample > OUT_CLIP_MAX) sample = OUT_CLIP_MAX; else if(sample < OUT_CLIP_MIN) sample = OUT_CLIP_MIN; s = sample; dst[j] = s; } } static void synth_8(OUTSAMPLE* dst, const INSAMPLE* src, FIXED* V){ ITER i, j; const FIXED* tabl = proto_8_80; const FIXED* costab = costab_8; //shift for(i = 159; i >= 16; i--) V[i] = V[i - 16]; //matrix for(i = 0; i < 16; i++){ FIXED_S t = (FIXED_S)costab[0] * (FIXED_S)src[0] + (FIXED_S)costab[1] * (FIXED_S)src[1] + (FIXED_S)costab[2] * (FIXED_S)src[2] + (FIXED_S)costab[3] * (FIXED_S)src[3] + (FIXED_S)costab[4] * (FIXED_S)src[4] + (FIXED_S)costab[5] * (FIXED_S)src[5] + (FIXED_S)costab[6] * (FIXED_S)src[6] + (FIXED_S)costab[7] * (FIXED_S)src[7]; costab += 8; V[i] = t >> NUM_FRAC_BITS_COS; } //calculate audio samples for(j = 0; j < 8; j++){ OUTSAMPLE s; FIXED_S sample = (FIXED_S)V[j + 0] * (FIXED_S)tabl[0] + (FIXED_S)V[j + 24] * (FIXED_S)tabl[1] + (FIXED_S)V[j + 32] * (FIXED_S)tabl[2] + (FIXED_S)V[j + 56] * (FIXED_S)tabl[3] + (FIXED_S)V[j + 64] * (FIXED_S)tabl[4] + (FIXED_S)V[j + 88] * (FIXED_S)tabl[5] + (FIXED_S)V[j + 96] * (FIXED_S)tabl[6] + (FIXED_S)V[j +120] * (FIXED_S)tabl[7] + (FIXED_S)V[j +128] * (FIXED_S)tabl[8] + (FIXED_S)V[j +152] * (FIXED_S)tabl[9]; tabl += 10; sample >>= (NUM_FRAC_BITS_PROTO - 1 - 3); //-3 is for the -8 we need to multiply by :) sample = -sample; if(sample > OUT_CLIP_MAX) sample = OUT_CLIP_MAX; else if(sample < OUT_CLIP_MIN) sample = OUT_CLIP_MIN; s = sample; dst[j] = s; } } static void synth(OUTSAMPLE* dst, const INSAMPLE* src, uint8_t nBands, FIXED* V){ //A2DP sigure 12.3 if(nBands == 4) synth_4(dst, src, V); else synth_8(dst, src, V); } /* original tables (these were reordered to be in order they are accessed) proto_4_40: 0.00000000E+00,5.36548976E-04,1.49188357E-03,2.73370904E-03, 3.83720193E-03,3.89205149E-03,1.86581691E-03,-3.06012286E-03, 1.09137620E-02,2.04385087E-02,2.88757392E-02,3.21939290E-02, 2.58767811E-02,6.13245186E-03,-2.88217274E-02,-7.76463494E-02, 1.35593274E-01,1.94987841E-01,2.46636662E-01,2.81828203E-01, 2.94315332E-01,2.81828203E-01,2.46636662E-01,1.94987841E-01, -1.35593274E-01,-7.76463494E-02,-2.88217274E-02,6.13245186E-03, 2.58767811E-02,3.21939290E-02,2.88757392E-02,2.04385087E-02, -1.09137620E-02,-3.06012286E-03,1.86581691E-03,3.89205149E-03, 3.83720193E-03,2.73370904E-03,1.49188357E-03,5.36548976E-04 proto_8_80: 0.00000000E+00,1.56575398E-04,3.43256425E-04,5.54620202E-04, 8.23919506E-04,1.13992507E-03,1.47640169E-03,1.78371725E-03, 2.01182542E-03,2.10371989E-03,1.99454554E-03,1.61656283E-03, 9.02154502E-04,-1.78805361E-04,-1.64973098E-03,-3.49717454E-03, 5.65949473E-03,8.02941163E-03,1.04584443E-02,1.27472335E-02, 1.46525263E-02,1.59045603E-02,1.62208471E-02,1.53184106E-02, 1.29371806E-02,8.85757540E-03,2.92408442E-03,-4.91578024E-03, -1.46404076E-02,-2.61098752E-02,-3.90751381E-02,-5.31873032E-02, 6.79989431E-02,8.29847578E-02,9.75753918E-02,1.11196689E-01, 1.23264548E-01,1.33264415E-01,1.40753505E-01,1.45389847E-01, 1.46955068E-01,1.45389847E-01,1.40753505E-01,1.33264415E-01, 1.23264548E-01,1.11196689E-01,9.75753918E-02,8.29847578E-02, -6.79989431E-02,-5.31873032E-02,-3.90751381E-02,-2.61098752E-02, -1.46404076E-02,-4.91578024E-03,2.92408442E-03,8.85757540E-03, 1.29371806E-02,1.53184106E-02,1.62208471E-02,1.59045603E-02, 1.46525263E-02,1.27472335E-02,1.04584443E-02,8.02941163E-03, -5.65949473E-03,-3.49717454E-03,-1.64973098E-03,-1.78805361E-04, 9.02154502E-04,1.61656283E-03,1.99454554E-03,2.10371989E-03, 2.01182542E-03,1.78371725E-03,1.47640169E-03,1.13992507E-03, 8.23919506E-04,5.54620202E-04,3.43256425E-04,1.56575398E-04 js code to convert to fixpoint: var xa = new Array(values here...); var num = 0; var perRow = 4; var L = parseInt(xa.length); for(i = 0; i < L; i++){ x = xa[i]; var neg = 0; if(x < 0){ neg = 1; x = -x; } x *= (1 << 26); //this 26 should be the number of fraction bits x = parseInt(x + 0.5); s = x >> 28 x &= 0x0FFFFFFF; if(neg){ x = x ^ 0x0FFFFFFF; x++; s ^= 0x0F; if(x & 0x10000000) s++; x &= 0x0FFFFFFF; s &= 0x0F; } x = x.toString(16); while(x.length < 7) x = "0" + x; x = s.toString(16) + x; x = x.toUpperCase(); document.write("0x" + x); if(i != L - 1) document.write(","); if(++num == perRow){ num = 0; document.write("
"); } else document.write(" "); } js code to produce costab (adjust loop variables as needed for both table vairants) for(k = 0; k < 8; k++) for(i = 0 ;i < 4; i++){ document.write(Math.cos((i + 0.5) * (k + 2) * Math.PI / 4) + ", ") } js code to generate order tables (they are used for those strange offsets into the V array in synth_* when generating samples): var L = 200; var V = new Array(L); var U = new Array(80); var i, j; var nBands = 4; for(i = 0; i < L; i++) V[i] = i + 1; for(i = 0; i < 5; i++) for(j = 0; j < nBands; j++){ if(nBands == 4){ U[i * 8 + j] = V[i * 16 + j]; U[i * 8 + 4 + j] = V[i * 16 + 12 + j]; } else{ U[i * 16 + j] = V[i * 32 + j]; U[i * 16 + 8 + j] = V[i * 32 + 24 + j]; } } for(j = 0; j < nBands; j++) for(i = 0; i < 10; i++) document.write((U[j + nBands * i] - 1) + ",\t"); C code for reordering the proto_* tables to access order. insert table values and modify nBands as needed #include < stdio.h> #include < stdint.h> int32_t tabl[] = { table data here }; int main(int argc, char** argv){ int i, j; int nBands = 8; for(j = 0; j < nBands; j++) for(i = 0; i < 10; i++) printf("0x%08X, ",tabl[j + nBands * i]); } */