Tuesday, January 4, 2011

Interesting Algorithm question in Facebook Interview

The question is:

"Given a number in range of 1 to n, what is minimum number of guesses needed to find a specific number if you're just given an answer either "higher" or "lower" for each guess you make"

It sounds tricky, but actually the answer is very simple.

Here's the illustration:

the number to be guessed
                                             |  
                                             V
|----------------------------------------------------------------------|
min                              ^                                    max
                                 |
                                 |
                             your guess

Using common method of binary searching, our guess starts from: min + (max-min)/2 or a number in the middle of the range (divide-and-conquer). If our guess is lower than the number, we're given "LOWER" and vice versa.

As the number to be guessed is random, it is possible the number falls right in the middle of the range and matches our first guess.  So the answer of this question is (the key of this question is "minimum number of guesses") is 1.

Thursday, December 30, 2010

SSE built-in functions in GCC

int __builtin_ia32_comieq (v4sf, v4sf)
int __builtin_ia32_comineq (v4sf, v4sf)
int __builtin_ia32_comilt (v4sf, v4sf)
int __builtin_ia32_comile (v4sf, v4sf)
int __builtin_ia32_comigt (v4sf, v4sf)
int __builtin_ia32_comige (v4sf, v4sf)
int __builtin_ia32_ucomieq (v4sf, v4sf)
int __builtin_ia32_ucomineq (v4sf, v4sf)
int __builtin_ia32_ucomilt (v4sf, v4sf)
int __builtin_ia32_ucomile (v4sf, v4sf)
int __builtin_ia32_ucomigt (v4sf, v4sf)
int __builtin_ia32_ucomige (v4sf, v4sf)
v4sf __builtin_ia32_addps (v4sf, v4sf)
v4sf __builtin_ia32_subps (v4sf, v4sf)
v4sf __builtin_ia32_mulps (v4sf, v4sf)
v4sf __builtin_ia32_divps (v4sf, v4sf)
v4sf __builtin_ia32_addss (v4sf, v4sf)
v4sf __builtin_ia32_subss (v4sf, v4sf)
v4sf __builtin_ia32_mulss (v4sf, v4sf)
v4sf __builtin_ia32_divss (v4sf, v4sf)
v4si __builtin_ia32_cmpeqps (v4sf, v4sf)
v4si __builtin_ia32_cmpltps (v4sf, v4sf)
v4si __builtin_ia32_cmpleps (v4sf, v4sf)
v4si __builtin_ia32_cmpgtps (v4sf, v4sf)
v4si __builtin_ia32_cmpgeps (v4sf, v4sf)
v4si __builtin_ia32_cmpunordps (v4sf, v4sf)
v4si __builtin_ia32_cmpneqps (v4sf, v4sf)
v4si __builtin_ia32_cmpnltps (v4sf, v4sf)
v4si __builtin_ia32_cmpnleps (v4sf, v4sf)
v4si __builtin_ia32_cmpngtps (v4sf, v4sf)
v4si __builtin_ia32_cmpngeps (v4sf, v4sf)
v4si __builtin_ia32_cmpordps (v4sf, v4sf)
v4si __builtin_ia32_cmpeqss (v4sf, v4sf)
v4si __builtin_ia32_cmpltss (v4sf, v4sf)
v4si __builtin_ia32_cmpless (v4sf, v4sf)
v4si __builtin_ia32_cmpunordss (v4sf, v4sf)
v4si __builtin_ia32_cmpneqss (v4sf, v4sf)
v4si __builtin_ia32_cmpnlts (v4sf, v4sf)
v4si __builtin_ia32_cmpnless (v4sf, v4sf)
v4si __builtin_ia32_cmpordss (v4sf, v4sf)
v4sf __builtin_ia32_maxps (v4sf, v4sf)
v4sf __builtin_ia32_maxss (v4sf, v4sf)
v4sf __builtin_ia32_minps (v4sf, v4sf)
v4sf __builtin_ia32_minss (v4sf, v4sf)
v4sf __builtin_ia32_andps (v4sf, v4sf)
v4sf __builtin_ia32_andnps (v4sf, v4sf)
v4sf __builtin_ia32_orps (v4sf, v4sf)
v4sf __builtin_ia32_xorps (v4sf, v4sf)
v4sf __builtin_ia32_movss (v4sf, v4sf)
v4sf __builtin_ia32_movhlps (v4sf, v4sf)
v4sf __builtin_ia32_movlhps (v4sf, v4sf)
v4sf __builtin_ia32_unpckhps (v4sf, v4sf)
v4sf __builtin_ia32_unpcklps (v4sf, v4sf)
v4sf __builtin_ia32_cvtpi2ps (v4sf, v2si)
v4sf __builtin_ia32_cvtsi2ss (v4sf, int)
v2si __builtin_ia32_cvtps2pi (v4sf)
int __builtin_ia32_cvtss2si (v4sf)
v2si __builtin_ia32_cvttps2pi (v4sf)
int __builtin_ia32_cvttss2si (v4sf)
v4sf __builtin_ia32_rcpps (v4sf)
v4sf __builtin_ia32_rsqrtps (v4sf)
v4sf __builtin_ia32_sqrtps (v4sf)
v4sf __builtin_ia32_rcpss (v4sf)
v4sf __builtin_ia32_rsqrtss (v4sf)
v4sf __builtin_ia32_sqrtss (v4sf)
v4sf __builtin_ia32_shufps (v4sf, v4sf, int)
void __builtin_ia32_movntps (float *, v4sf)
int __builtin_ia32_movmskps (v4sf)

The following built-in functions are available when -msse is used.

v4sf __builtin_ia32_loadaps (float *)
Generates the movaps machine instruction as a load from memory. 
void __builtin_ia32_storeaps (float *, v4sf)
Generates the movaps machine instruction as a store to memory. 
v4sf __builtin_ia32_loadups (float *)
Generates the movups machine instruction as a load from memory. 
void __builtin_ia32_storeups (float *, v4sf)
Generates the movups machine instruction as a store to memory. 
v4sf __builtin_ia32_loadsss (float *)
Generates the movss machine instruction as a load from memory. 
void __builtin_ia32_storess (float *, v4sf)
Generates the movss machine instruction as a store to memory. 
v4sf __builtin_ia32_loadhps (v4sf, v2si *)
Generates the movhps machine instruction as a load from memory. 
v4sf __builtin_ia32_loadlps (v4sf, v2si *)
Generates the movlps machine instruction as a load from memory 
void __builtin_ia32_storehps (v4sf, v2si *)
Generates the movhps machine instruction as a store to memory. 
void __builtin_ia32_storelps (v4sf, v2si *)
Generates the movlps machine instruction as a store to memory.

Wednesday, December 29, 2010

Vector Addition using SIMD

#include 

#define VECTOR_SIZE         4
typedef float v4sf __attribute__ ((vector_size(sizeof(float)*VECTOR_SIZE))); // vector of four singl
e floats

typedef union f4vector
{
    v4sf    v;
    float   f[VECTOR_SIZE];
} f4vector;


void print_vector (f4vector *v)
{
    printf("%f,%f,%f,%f\n", v->f[0], v->f[1], v->f[2], v->f[3]);
}

int main()
{
    union f4vector a, b, c;

    a.v = (v4sf){1., 2., 3., 4.};
    b.v = (v4sf){5., 6., 7., 8.};
    c.v = a.v + b.v;

    print_vector(&a);
    print_vector(&b);
    print_vector(&c);
}

Compile with the following command:
gcc -ggdb -mtune=pentium3 -march=pentium3 -c -O3 -ffast-math -mfpmath=sse -msse5 sse.c

To test, just link the object code to binary:

gcc -lm sse.o -o sse

$ ./sse
1.000000,2.000000,3.000000,4.000000
5.000000,6.000000,7.000000,8.000000
6.000000,8.000000,10.000000,12.000000

The assembled code:

$ objdump -dS ./sse.o | grep -2 c.v | tail -8
  7c:   0f 58 c1                addps  %xmm1,%xmm0
  7f:   0f 29 45 c8             movaps %xmm0,-0x38(%ebp)
--
 120:   f2 0f 11 44 24 04       movsd  %xmm0,0x4(%esp)
 126:   e8 00 00 00 00          call   12b <_main+0xdb>
        c.v = a.v + b.v;

        print_vector(&a);

As we can see, it's very optimized where adding 4 components of vector a and b is done in one SSE instruction (addps) instead of multiple instructions if we don't use -msse and -mfpmath=sse



How fast is the program?

$ time ./sse
1.000000,2.000000,3.000000,4.000000
5.000000,6.000000,7.000000,8.000000
6.000000,8.000000,10.000000,12.000000

real    0m0.109s
user    0m0.046s
sys     0m0.030s

Thursday, December 23, 2010

Mac OSX Lion: Another Windows remake?

Apple has just updated its website and now it announces that they plan to release another Mac OS-X named "Lion".  A sneak peak to the features, some of the features are not really "wow" me and even seems too-old to be a breakthrough.  For example, "LauchPad".  Windows xx has had it for long time as "Desktop icons".  Another one is "Mission Control" which the similar feature has been in Windows 7 for awhile.

Unfortunately, Apple has not revealed all the features they plan to put in OS-X.  Not sure if the upgrade worth the cost of upgrade (well, if it is only $25 upgrade I'll just go ahead and upgrade mine).

Sunday, November 14, 2010

Downsampling MP3 file

#!/usr/bin/tclsh

set ffmpeg [exec which ffmpeg]
puts "ffmpeg  = $ffmpeg"
if { [llength $argv] < 2 } {
    puts "\n$argv0  n bps\n"
    exit -1
}
set fi [lindex $argv 0]
set br [lindex $argv 1]
puts "$fi"
puts "target $br bps"

if { [regexp -all {(.*)\.([mM][pP]3)} $fi a b c] } {
    set fo "${b}_${br}bps.$c"
    puts "$fi ==> $fo, ext=$c, new bitrate=$br"
    set id3 [exec id3v2 -l $fi]
    set id3par ""
    if { [regexp -line {TIT2.*: (.*)\n} $id3 dummy title c] } { puts Title=$title }
    if { [regexp -line {TPE1.*: (.*)\n} $id3 dummy singer c] } { puts singer=$singer }
    if { [regexp -line {TALB.*: (.*)\n} $id3 dummy album c] } { puts album=$album }
    if { [regexp -line {TYER.*: (.*)\n} $id3 dummy year c] } { puts year=$year }
    if { [regexp -line {TCON.*\(([0-9]+)\)\n} $id3 dummy genre c] } { puts genre=$genre }
    set cmd "$ffmpeg -threads 16 -y -ab $br -i $fi $fo"
    if {![file exists $fo]} {
        if { [catch { set res [eval exec $cmd] fid }] } {
            #puts stderr "Could not execute $cmd"
            if {[info exists fid] } { puts stderr $fid" }
            #exit 1
        }
    }
    if {[info exists title]} { append idpar " --TIT2 \"$title\"" }
    if {[info exists singer]} { append idpar " --TPE1 \"$singer\"" }
    if {[info exists album]} { append idpar " --TALB \"$album\"" }
    if {[info exists year]} { append idpar " --TYER \"$year\"" }
    if {[info exists genre]} { append idpar " --TCON \"$genre\"" }
    set cmd "id3v2 $idpar $fo"
    puts $cmd
    if { [file exists $fo]} {
        eval exec $cmd
    }
} else {
    puts "Not an MP3 file"
}