#!/usr/bin/python

# dictionary2.py by Nadeem Abdul Hamid, August 2006
# based on code by Saketh Bhamidipati, 2005

# This version uses regular expressions to search and remove
# all links from the output file.

# Demonstrates Python regular expressions, doc strings

"""
This program will read in a word list from a file and construct as
output an HTML file (web page) containing the definitions of all
the words, as found at dictionary.reference.com.
"""

import re
import sys
from urllib2 import *


def buildDictURL( word ):
    return 'http://dictionary.reference.com/search?q=%s' % word;


def lookupWord( word, outFileHandle ):
    """
    Compile a list of definitions for the given word and write them
    out, in HTML format, to the output file handle.
    """
    inDef = False    # flag to indicate that we are reading in the
                     # definitions portion of the web page and it should
                     # be printed to the output file

    dict_url = buildDictURL( word )
    lines = []       # lines to be output
    for line in urlopen( dict_url ):
        if '<!-- begin ahd4 -->' in line:
            inDef = True
        elif '<!-- end ahd4 -->' in line:
            inDef = False

        if inDef:
            lines.append( line )

    output = ''.join( lines )  # concatenates together all the lines,
                               # separated by '' (i.e. nothing)
    outFileHandle.write( cleanLinks( output ) );
    


def lookupWords( inFileHandle, outFileHandle ):
    """
    Read word list from input file and write a compilation of
    definition in HTML format to the output file.
    """
    outFileHandle.write('<html>\n<body>')

    for word in inFileHandle.readlines():
        lookupWord( word, outFileHandle )

    outFileHandle.write('</body>\n</html>');



def cleanLinks( string ):
    """
    Remove all hyperlinks from HTML source code and delete 'pron.jpg'
    images.
    """
    p = re.compile( r'<a.*?>(.+?)</a>', re.DOTALL | re.IGNORECASE );
    string = p.sub( r'\1', string );
    p = re.compile( r'<img .*?/pron.jpg".*?>' );
    string = p.sub( '', string );
    return string;



def main(argv=sys.argv):
    if len(argv) == 3:
        try:
            inFileH = file( argv[1], 'r' );
            outFileH = file( argv[2], 'w' );
            lookupWords( inFileH, outFileH );
            inFileH.close();
            outFileH.close();
        except IOError, e:
            print "Sorry, an exception occurred:", e
    else:
        # print usage message
        print \
"""
  dictionary2.py, 2006 Nadeem Abdul Hamid
  Usage:
         dictionary2.py inFileName outFileName
"""



if __name__ == '__main__':
    main();
