#!/usr/bin/env python
# $Id$
# Author: Chris Green <cmg@uab.edu>
# Purpose: helper script for htdig to parse html files
# Created: Sun Jun 03 16:39:39 CDT 2001

# split_msg adapted from a script by Paul Moore
# (gustav@morpheus.demon.co.uk
#
#
# quote_body taken from the HyperText package 


import sys, os, glob, re, mimetools, multifile, string
from mimetools import Message


def quote_body(s):
    r=string.replace
    return r(r(r(s, '&', '&amp;'), '<', '&lt;'), '>', '&gt;')


boundary_checkre = re.compile(r'^"(.*)".*',re.DOTALL)

def split_msg(msg):
    parts = []
    file = multifile.MultiFile(msg.fp)
    boundary = msg.getparam("boundary")
    if boundary[0] == '"':
        # fix broken sourceforge postings
        boundary = re.sub(boundary_checkre, r'\1', boundary)
    
    file.push(boundary)
    while file.next():
        submsg = mimetools.Message(file)
        if submsg.gettype() == "text/plain":
            parts.append(file.read())
            
    file.pop()
    return parts


def print_msg(file):
    fp = open(file)
    msgfile = multifile.MultiFile(fp, 1)
    msg = Message(msgfile)
    
    print '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" \
    "http://www.w3.org/TR/REC-html40/loose.dtd">'
    print '<html><head>'
    subject = quote_body(msg.getheader('Subject'))
    if subject == None:
        subject = "No subject given"
    
    print '<title>%s</title>' % subject
    print '<meta name="Author" content="%s">' % quote_body(msg.getheader('From'))

    msg_type = msg.gettype()
    content = ""

    if msg_type == 'text/plain':
        content = msg.fp.read()
    else:
        maintype = msg.getmaintype()
        # for each in msg.getplist():
        #         print msg.getparam('boundary')
        #         print '**'
        #         print 'I dont know how to handle' + msg_type
        try:
            parts = split_msg(msg)
            for each in parts:
                content =  content + each
        except TypeError, multifile.Error:
            # somethings have broken mime headers
            # just contruct something psuedo workable
            content = "Can't Parse Message."
            
    print "</head><body>" # <pre>"
    print quote_body(content)

    print "</body></html>"
    
    

if __name__=='__main__':
    if len(sys.argv) != 5:
         print "usage: mailparse.py file content-type URL configuration_file"
	 # print len(sys.argv)
         sys.exit(1)

    print_msg(sys.argv[1])
