# -*- Mode: Python -*-
#
#       Author: Sam Rushing <rushing@nightmare.com>
#       Copyright 1997 by Sam Rushing
#                                                All Rights Reserved.
"""
Medusa was once distributed under a 'free for non-commercial use'
license, but in May of 2000 Sam Rushing changed the license to be
identical to the standard Python license at the time.  The standard
Python license has always applied to the core components of Medusa,
this change just frees up the rest of the system, including the http
server, ftp server, utilities, etc.  Medusa is therefore under the
following license:

==============================
Permission to use, copy, modify, and distribute this software and
its documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and
that both that copyright notice and this permission notice appear in
supporting documentation, and that the name of Sam Rushing not be
used in advertising or publicity pertaining to distribution of the
software without specific, written prior permission.

SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
==============================

Sam would like to take this opportunity to thank all of the folks who
supported Medusa over the years by purchasing commercial licenses.
"""
#
#       This is a special modified version by Robert Chavers...
#       See the sections that start with:
#       # added by rac
#       I may have forgot to mark some places I modified, but I did try...
#


RCS_ID = '$Id: default_handler.py,v 1.7 2002/04/01 13:11:56 amk Exp $'

# standard python modules
import mimetypes
import re
import stat
import string

# medusa modules
import http_date
import http_server
import status_handler
import producers

# added by rac
import StringIO
import traceback
import os
import sys
import pyehtml_parser

unquote = http_server.unquote

# This is the 'default' handler.  it implements the base set of
# features expected of a simple file-delivering HTTP server.  file
# services are provided through a 'filesystem' object, the very same
# one used by the FTP server.
#
# You can replace or modify this handler if you want a non-standard
# HTTP server.  You can also derive your own handler classes from
# it.
#
# support for handling POST requests is available in the derived
# class <default_with_post_handler>, defined below.
#

from counter import counter

class default_handler:

    valid_commands = ['get', 'head']

    IDENT = 'Default HTTP Request Handler'

    # Pathnames that are tried when a URI resolves to a directory name
    directory_defaults = [
            'index.html',
            'Index.html',
            'default.html'
            ]

    default_file_producer = producers.file_producer

    def __init__ (self, filesystem):
        self.filesystem = filesystem
        # count total hits
        self.hit_counter = counter()
        # count file deliveries
        self.file_counter = counter()
        # count cache hits
        self.cache_counter = counter()

    hit_counter = 0

    def __repr__ (self):
        return '<%s (%s hits) at %x>' % (
                self.IDENT,
                self.hit_counter,
                id (self)
                )

    # always match, since this is a default
    def match (self, request):
        return 1

    # handle a file request, with caching.

    def handle_request (self, request):

        if request.command not in self.valid_commands:
            request.error (400) # bad request
            return

        self.hit_counter.increment()

        path, params, query, fragment = request.split_uri()

        if '%' in path:
            path = unquote (path)

        # strip off all leading slashes
        while path and path[0] == '/':
            path = path[1:]

        if self.filesystem.isdir (path):
            if path and path[-1] != '/':
                request['Location'] = 'http://%s/%s/' % (
                        get_header (HOST, request.header),
                        path
                        )
                request.error (301)
                return

            # we could also generate a directory listing here,
            # may want to move this into another method for that
            # purpose
            found = 0
            if path and path[-1] != '/':
                path = path + '/'
            for default in self.directory_defaults:
                p = path + default
                if self.filesystem.isfile (p):
                    path = p
                    found = 1 
                    request.uri = request.uri + path
            if not found:
                request.error (404) # Not Found
                return

        elif not self.filesystem.isfile (path):
            request.error (404) # Not Found
            return

        file_length = self.filesystem.stat (path)[stat.ST_SIZE]

        ims = get_header_match (IF_MODIFIED_SINCE, request.header)

        length_match = 1
        if ims:
            length = ims.group (4)
            if length:
                try:
                    length = string.atoi (length)
                    if length != file_length:
                        length_match = 0
                except:
                    pass

        ims_date = 0

        if ims:
            ims_date = http_date.parse_http_date (ims.group (1))

        try:
            mtime = self.filesystem.stat (path)[stat.ST_MTIME]
        except:
            request.error (404)
            return

        if length_match and ims_date:
            if mtime <= ims_date:
                request.reply_code = 304
                request.done()
                self.cache_counter.increment()
                return
        try:
            file = self.filesystem.open (path, 'rb')
        except IOError:
            request.error (404)
            return

        request['Last-Modified'] = http_date.build_http_date (mtime)
        request['Content-Length'] = file_length
        self.set_content_type (path, request)


        # added by rac
        if string.lower(request.uri[-5:]) == '.html':
            file = self.continue_request(request, file)
            
        if request.command == 'get':
            request.push (self.default_file_producer (file))

        self.file_counter.increment()
        request.done()


    # added by rac
    def continue_request (self, request, file):
        return pyehtml_parser.parse_file (request, self.filesystem.root, file)


    def set_content_type (self, path, request):
        ext = string.lower (get_extension (path))
        typ, encoding = mimetypes.guess_type(path)
        if typ is not None:
            request['Content-Type'] = typ
        else:
            # TODO: test a chunk off the front of the file for 8-bit
            # characters, and use application/octet-stream instead.
            request['Content-Type'] = 'text/plain'

    def status (self):
        return producers.simple_producer (
                '<li>%s' % status_handler.html_repr (self)
                + '<ul>'
                + '  <li><b>Total Hits:</b> %s'                 % self.hit_counter
                + '  <li><b>Files Delivered:</b> %s'    % self.file_counter
                + '  <li><b>Cache Hits:</b> %s'                 % self.cache_counter
                + '</ul>'
                )

# HTTP/1.0 doesn't say anything about the "; length=nnnn" addition
# to this header.  I suppose it's purpose is to avoid the overhead
# of parsing dates...
IF_MODIFIED_SINCE = re.compile (
        'If-Modified-Since: ([^;]+)((; length=([0-9]+)$)|$)',
        re.IGNORECASE
        )

USER_AGENT = re.compile ('User-Agent: (.*)', re.IGNORECASE)

CONTENT_TYPE = re.compile (
        r'Content-Type: ([^;]+)((; boundary=([A-Za-z0-9\'\(\)+_,./:=?-]+)$)|$)',
        re.IGNORECASE
        )
HOST = re.compile ('Host: ([^:/]+).*', re.IGNORECASE)

get_header = http_server.get_header
get_header_match = http_server.get_header_match

def get_extension (path):
    dirsep = string.rfind (path, '/')
    dotsep = string.rfind (path, '.')
    if dotsep > dirsep:
        return path[dotsep+1:]
    else:
        return ''
