Thursday, March 15, 2012

Fast Hashing of Variable-length text strings

The following Hash algorithm based on a research paper written by Peter Pearson (a scientist at Lawrence Livermore National Lab) has an ability to generate unique hash numbers for strings with lengths no longer than 256 bytes. That means, we can have up to 256 buckets in a hash table, but it guarantees the uniqueness (free of hash-collision), thus no need to have linked-list in buckets.

The beauty of this algorithm is that it is designed to work on small microprocessors or microcontrollers, which sometimes don't have luxury to do complex arithmetics in their Instruction set (or, require more instructions).  Instead, the following algorithm uses XOR and table lookup which should be easy to do in small CPUs.  On more modern and high-end CPUs, XOR operation translated directly to mnemonic XOR in machine-code (a single instruction) and done even in almost wire-speed.

One of the use I can think of is for creating MAC table for virtual bridging.  An Ethernet MAC address takes only 6 octets, so this algorithm can be used to lookup 256-bucket MAC table.  The hash-key is MACaddr, while the output is port.

For example:

A MAC table structure might look like below:
+-------------------+---------+------+
|      MACAddr      +  port   |  Age |
+-------------------+---------+------+
| XX:XX:XX:XX:XX:XX |    1    |  6   |
| YY:YY:YY:YY:YY:YY |    2    |  7   |
| ZZ:ZZ:ZZ:ZZ:ZZ:ZZ |    3    |  102 |
| AA:BB:BB:BB:BB:BB |    4    |  33  |
| AA:AA:AA:AA:AA:AA |    5    |  76  |
+-------------------+---------+------+

Learning:
port = PearsonHash(SourceMacAddr);
MACTable[port].addr = SourceMacAddr;
MACTable[port].port = port;

Forwarding:
inspect destination MAC address from incoming ethernet frame

//Flooding
if (DestMacAddr == Broadcast)
      Send to all ports, except the original incoming port, with this frame

// a small filtering is done here to prevent loop
if (DestMacAddr != SourceMacAddr)
{
     port = PearsonHash(DestMacAddr);
     if port is invalid:
          flooding
     else
     DataForward(EthernetFrame, port);
}

Aging:
The following statements can be executed periodically.

for each entry/bucket in the MAC table:
      if MACTable[i].age < 1:
            Flush(MACTable[i])

A challenge might be how to expand this algorithm to handle, say, 64000 buckets/entries?  How to generate the pseudorandom hash table?

#include <stdio.h>
#include <string.h>
#include <stdlib.h>


static unsigned char PseudoRandomHash[256] = {
     1,   87,   49,  12, 176, 178,  102, 166, 121, 193,   6,  84, 249, 230,  44,  163, 
     14, 197,  213, 181, 161,  85,  218,  80,  64, 239,  24, 226, 236, 142,  38,  200, 
    110, 177,  104, 103, 141, 253,  255,  50,  77, 101,  81,  18,  45,  96,  31,  222, 
     25, 107,  190,  70,  86, 237,  240,  34,  72, 242,  20, 214, 244, 227, 149,  235, 
     97, 234,   57,  22,  60, 250,   82,  175, 208,   5, 127, 199, 111,  62, 135,  248, 
    174, 169,  211,  58,  66, 154,  106, 195, 245, 171,  17, 187, 182, 179,   0,  243, 
    132,  56,  148,  75, 128, 133,  158, 100, 130, 126,  91,  13, 153, 246, 216,  219,
    119,  68,  223,  78,  83,  88,  201,  99, 122,  11,  92,  32, 136, 114,  52,   10, 
    138,  30,   48, 183, 156,  35,   61,  26, 143,  74, 251,  94, 129, 162,  63,  152, 
    170,   7,  115, 167, 241, 206,    3, 150,  55,  59, 151, 220,  90,  53,  23,  131, 
    125, 173,   15, 238,  79,  95,   89,  16, 105, 137, 225, 224, 217, 160,  37,  123, 
    118,  73,    2, 157,  46, 116,    9, 145, 134, 228, 207, 212, 202, 215,  69,  229, 
     27, 188,   67, 124, 168, 252,   42,   4,  29, 108,  21, 247,  19, 205,  39,  203, 
    233,  40,  186, 147, 198, 192,  155,  33, 164, 191,  98, 204, 165, 180, 117,   76, 
    140,  36,  210, 172,  41,  54,  159,   8, 185, 232, 113, 196, 231,  47, 146,  120, 
     51,  65,   28, 144, 254, 221,   93, 189, 194, 139, 112,  43,  71, 109, 184,  209}; 
     
#define N   256
     
     
int PearsonHash(const char *str)
{
    int n = strlen(str);
    int i;
    unsigned char *h;
    int result;
    
    if (n > N)
    {
        puts("Pearson Hashing can only be used for string length <256 characters");
        return -1;
    }
    h = (unsigned char *)malloc(N);
    bzero(h, sizeof(char)*N);
    h[0] = 0;
    for(i=1; i<n; i++)
    {
        h[i] = PseudoRandomHash[(h[i-1] ^ str[i])& 0xFF];
        //printf("h[%u]=%0x\n", i, h[i]);
        result = h[i];
    }
    free(h);
    return result;
}

     
int main()
{
    int i;
    int n;
    const char MyStringBase[] = "Ahlan Wa Sahlan Bib, ";
    char *StrTable[N];
    
    for(i=0; i<N; i++)
    {
        printf(" %03u ", PseudoRandomHash[i]);
        if ((i+1)%16 == 0)
            printf("\n");
    }
    for(i=0; i<N; i++)
    {
        StrTable[i] = (char *)malloc(N);
        sprintf(StrTable[i], "%s%u", MyStringBase, i);
    }    
    for(i=0; i<N; i++)
    {
        printf("\n%s: \tHash Index=%u\n", StrTable[i], PearsonHash(StrTable[i]));
        free(StrTable[i]);
    }    
}     

Wednesday, March 14, 2012

Print Canonical Addresses of a Site

This 2 lines of Python queries DNS and display the canonical names of a host.  It's like a simple DNS lookup (nslookup).

#!/usr/bin/python

import sys
import socket

HOST=''
PORT=80

if (len(sys.argv) < 2):
 print sys.argv[0], " "
 sys.exit(0)

HOST = sys.argv[1]
for res in socket.getaddrinfo(HOST, PORT, socket.AF_UNSPEC, socket.SOCK_STREAM):
   print res[4][0]



For example, if we execute the script and pass parameter 'www.google.com', we'd get this:

$ ./getdns.py www.google.com
74.125.224.50
74.125.224.51
74.125.224.48
74.125.224.49
74.125.224.52


Python GUI

This small Python script creates a window application and handles menu events as well as updating statusbar at the bottom.

#!/usr/bin/python

from Tkinter import *
import tkMessageBox

class StatusBar(Frame):

    def __init__(self, master):
        Frame.__init__(self, master)
        self.label = Label(self, text="", bd=1, relief=SUNKEN, anchor=W)
        self.label.pack(side=BOTTOM, fill=X)

    def set(self, format, *args):
        self.label.config(text=format % args)
        self.label.update_idletasks()

    def clear(self):
        self.label.config(text="")
        self.label.update_idletasks()

class App:

    def __init__(self, parent):
        self.this = self
        frame = Frame(parent)
        parent.protocol("WM_DELETE_WINDOW", ConfirmQuit)

        parent.geometry("%dx%d%+d%+d" % (600, 400, 100, 50))

        # create a menu
        menu = Menu(parent)
        parent.config(menu=menu)

        filemenu = Menu(menu)
        menu.add_cascade(label="File", menu=filemenu)
        filemenu.add_command(label="New", command=self.newFile)
        filemenu.add_command(label="Open...", command=self.OpenFile)
        filemenu.add_separator()
        filemenu.add_command(label="Exit", command=self.Exit)

        runmenu = Menu(menu)
        menu.add_cascade(label="Run", menu=runmenu)
        runmenu.add_command(label="Run now", command=self.callback)

        helpmenu = Menu(menu)
        menu.add_cascade(label="Help", menu=helpmenu)
        helpmenu.add_command(label="About...", command=self.About)

    def callback(event):
        print "callback was called from event class ",event.__class__
        print "event.__name__=",__name__
        print "event type:", type(event)
        print "event.this.__class__=", event.this.__class__
        print "this: type=", type(event.this), ",class=", event.this.__class__
        print "status_bar", status_bar.__class__
        status_bar.set("%s", "Ahlan bib")

    def newFile(event):
        print "New file", event
        status_bar.set("%s", "Creating a new file")

    def OpenFile(event):
        print "Open File", event
        status_bar.set("%s", "Opening a file")

    def About(event):
        print "(c) 2012, mlutfi", event
        status_bar.clear()

    def say_hi(self):
        print "hi there, everyone!"

    def Exit(event):
        ConfirmQuit()

class BaseWindow:
    def __init__(self, parent):
        Toplevel()


def ConfirmQuit():
    if tkMessageBox.askokcancel("Quit", "Do you really want to quit?"):
        root.destroy()

root = Tk()
root.protocol("WM_DELETE_WINDOW", ConfirmQuit)

status_bar = StatusBar(root)
status_bar.pack(side=BOTTOM, fill=X)

app = App(root)

root.mainloop()

Sunday, March 11, 2012

Fibonacci in Python

# Fibonacci numbers module

def fib(n):    # write Fibonacci series up to n
    a, b = 0, 1
    while b < n:
        #print b,
        a, b = b, a+b

def fib2(n): # return Fibonacci series up to n
    result = []
    a, b = 0, 1
    while b < n:
        result.append(b)
        a, b = b, a+b
    print b
    return result
    
# make it executable as a script as well as module_exit
if __name__ == "__main__":
    import sys
    if (len(sys.argv) > 1):
        f = fib2(int(sys.argv[1]))
        str = ""
        print "Fibonacci sequence: ", f
    else:
        print sys.argv[0], " "

Downloading book "Foundations of Computer Science"

This small python script would download all chapters in the book "Foundations of Computer Science".





#!/usr/bin/python

import sys
from subprocess import call  # for 'call'


urlbase = 'http://infolab.stanford.edu/~ullman/focs/'

others = ['preface.pdf', 'toc.pdf', 'index.pdf']
chapters = range(1,15)
links = []
links.extend(others)

def download(index, f):
    url = urlbase + f
    print "file = ", index+1, url
    call(["wget", url])


for ch in chapters:
    links.append('ch' + '%02d' %(ch) + '.pdf')

out = [download(index,obj) for index,obj in enumerate(links)]

print "out=", out


Regular Expression in Python

The following is a Python snippet to do regular expression. First it reads a file passed as an argument, the it goes to a for-loop for each line and do string regular-expression search
#!/usr/bin/python

import sys # for sys.argv, etc.
import re  # regular expression

if (len(sys.argv) > 1):
    filename = sys.argv[1]
else:
    print sys.argv[0], " "
    sys.exit(0)


with open(filename, 'r') as f:
    #f.read()
    for line in f:
        pattern = re.compile(r"print (.*)")
        match = pattern.search(line)
        if match:
            print "print is used to print '", match.group(1), "'"

~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ readfile.py 17,3-9 All "readfile.py" 21L, 386C