Dari contoh kode yang dilampirkan di paper[1] dalam bahasa C dan untuk fungsi logaritma natural (ln), saya ubah sesuai dengan kebutuhan saya yaitu log10 dan dalam bahasa pascal (delphi)
var {16 bit used for mantissa lookup} lut : array[0..65535] of single; {IEEE 754 32-bit Floating-Point Number} procedure precalc_lut; var step : integer; i, p, x : integer; fval : single; pf : ^dword; begin pf := @fval; x := $3F800000; pf^ := x; step := 1 shl 16;//23 - 7 p := 1 shl 7; for i := 0 to p - 1 do begin lut[i] := log2( fval ); x := x + step; pf^ := x; end; end; function _log10( val: single ): single; register; var pf : ^dword; x, log_2 : integer; begin pf := @val; x := pf^; log_2 := ( ( x shr 23 ) and $FF ) - 127; x := (x and $7FFFFF) shr 16; result := ( ( lut[x] + log_2 ) * 0.301029995664 ); end; |
hasilnya setelah diuji coba (1000000 pemanggilan fungsi, menggunakan gettickcount), lumayan ~4 kali lebih cepat dibanding fungsi log10 bawaan delphi (unit Math).
di lain pihak, ternyata Om Kiki pun membuat implementasi menggunakan deret MacLaurin dalam SSE. Walaupun nggak terlalu akurat dan terbatas di interval 0 <>
#include #include float maclaurin(float val){ __declspec(align(16) ) float tmp[4] = { 1.0F,1.0F,1.0F,0.0F}; __declspec(align(16) ) float multiplier[4] = { 1.0F,-0.5F,0.33333333F,-0.25F}; tmp[3] = val; __m128 vec1 = _mm_load_ps(tmp); __m128 vec2 = _mm_shuffle_ps( vec1,vec1,0xF4); __m128 vec3 = _mm_shuffle_ps( vec1,vec1,0xFC); __m128 vec4 = _mm_shuffle_ps( vec1,vec1,0xFF); __m128 acc = _mm_mul_ps(vec1, vec2); acc = _mm_mul_ps(acc, vec3); vec1 = _mm_load_ps(multiplier); acc = _mm_mul_ps(acc, vec4); acc = _mm_mul_ps(acc,vec1); vec1 = _mm_shuffle_ps(acc,acc, 0xE4); vec2 = _mm_shuffle_ps(acc,acc, 0x39); vec3 = _mm_shuffle_ps(acc,acc, 0x4E); vec4 = _mm_shuffle_ps(acc,acc, 0x93); acc = _mm_add_ps(vec1,vec2); acc = _mm_add_ps( acc, vec3); acc = _mm_add_ps(acc, vec4); float res; _mm_store_ss(&res,acc); return res; } float my_log(float x){ return maclaurin(x - 1); } int main(){ float reference = logf(1.3F); float target = my_log(1.3F); printf(" reference = %g , target = %g\n", reference, target); } |
0 comments:
Post a Comment
Click to see the code!
To insert emoticon you must added at least one space before the code.