"""wsgiwrap: web application deployment helper

Invoke application through CGI, WSGI and command-line interfaces.
Optionally monitor and update modules in a library directory, and provide
access to application setup and background routines.
"""

version_info= 0,5
import sys
assert sys.version_info>=(2,2), 'wsgiwrap requires Python 2.2 or later'

import os.path, site, time, thread, atexit, signal
try:
    import zipimporter
    zimp= zipimporter.zipimport
except ImportError:
    zimp= None

usage= """<mode>, where mode is:
    cgi - always run webapp as if CGI (even if no CGI environment seen)
    setup [args] - run application's setup routines (if it has any)
    daemon - run application's background tasks directly (if it has any)
    start - run daemon as a background process (posix init script)
    stop - stop daemon running in background (posix init script)
    restart - bounce daemon background process (posix init script)
"""


class ModuleUpdater(object):
    """Checks for modules under a given directory being updated, force reloads

    A delay in seconds may optionally be given to avoid checking until a
    certain amount of time has passed (improves performance on high-traffic
    sites that don't want to be constantly checking for library updates).
    """
    # Lookup of libdir to last-check-time. This is shared by all instances, so
    # that more than one ModuleUpdater can be in use without having to reload
    # all the modules multiple times per update.
    #
    checks= {}
    checks_lock= thread.allocate_lock()

    def __init__(self, libdir, delay= None):
        self.libdir= os.path.normcase(os.path.normpath(libdir))
        self.delay= delay
        ModuleUpdater.checks_lock.acquire()
        try:
            ModuleUpdater.checks.setdefault(self.libdir, time.time())
        finally:
            ModuleUpdater.checks_lock.release()

    def update(self):
        """Unload all modules in the directory if any have been updated

        This will cause the modules to be re-imported next time they are
        needed. Unloading all at once will ensure that inter-module references
        are not kept. This should work reliably as long as none of the modules
        give references to other (eg. system) modules outside of the given
        libdir.

        Return True if an unload occurred.
        """
        ModuleUpdater.checks_lock.acquire()
        try:

            # Lazy checking. If some ModuleUpdater instance in this process has
            # recently checked our libdir, don't bother check it again yet. Do
            # recheck if time appears to have gone backwards... can't catch
            # every possible case where the system clock has been messed with,
            # but do our best.
            #
            now= time.time()
            lastcheck= ModuleUpdater.checks[self.libdir]
            if self.delay is not None and now-self.delay<=lastcheck<now:
                return False
            ModuleUpdater.checks[self.libdir]= now

            # Scan through Python's modules map looking for any that might be
            # out of date compared to their source files. Note: None is used in
            # this lookup as a marker for failed relative imports. If we end up
            # doing a reload, we must remove all the None entries as well as
            # the real modules, otherwise a future import can get confused and
            # import a None.
            #
            reload= False
            modules= []
            thismodule= Wrapper.__module__
            for name, module in sys.modules.items():
                if module is None:
                    modules.append(name)

                # Check timestamps of modules stored as descendents of the
                # libdir. Note: don't try to reload ourselves, even if we
                # happen to be stored in the libdir.
                #
                elif hasattr(module, '__file__') and module is not thismodule:
                    path= os.path.normcase(os.path.normpath(module.__file__))
                    if path.startswith(self.libdir):
                        modules.append(name)
                        if not reload:

                            # Make list of files whose timestamps should be
                            # checked. For plain filesystem paths check the
                            # source code as well as the file the module was
                            # loaded from, which may be a bytecode cache.
                            #
                            paths= []
                            loader= getattr(module, '__loader__', None)
                            if loader is None:
                                paths.append(module.__file__)
                                if path.endswith('.pyc') or path.endswith('.pyo'):
                                    paths.append(module.__file__[:-1])

                            # Recognise and cope with the zip/egg importer. If
                            # some other as-yet-unknown importer is in use,
                            # we can't tell where (or if) we can look for a
                            # timestamp, so ignore the module for update-
                            # checking purposes.
                            #
                            elif zimp is not None and isinstance(loader, zimp):
                                paths.append(loader.archive)

                            # Check timestamps
                            #
                            for path in paths:
                                try:
                                    if os.stat(path).st_mtime>=int(lastcheck):
                                        reload= True
                                        break
                                except OSError:
                                    pass

            # If any one source file was out of date, unload all of the modules
            # in scope.
            #
            if reload:
                for name in modules:
                    del sys.modules[name]

        finally:
            ModuleUpdater.checks_lock.release()
        return reload


class _Log(object):
    """Auto-flushing file wrapper, to ensure no std output is lost on quit
    """
    def __init__(self, f):
        self.f= f
    def write(self, s):
        self.f.write(s)
        self.f.flush()

class _Cleanup(object):
    """Callbacks for deleting pidfile on exit via atexit and signal
    """
    def __init__(self, path):
        self.path= path
    def signal(self, signo, frame):
        self.close()
        os._exit(0)
    def close(self):
        try:
            os.remove(self.path)
        except (OSError, IOError):
            pass


class Wrapper(object):
    """WSGI application invocation wrapper

    When run through CGI/WSGI, ensures given library path is available on the
    pythonpath, and when in a persistant WSGI server, make sure modules in the
    path get reloaded if they are updated on disc.

    Additionally provide utilities for invoking the application through a
    command-line interface.
    """
    def __init__(self, factory, libdir, delay= None, ismain= False, pidfile= None, logfile= None, ugid= None):
        site.addsitedir(libdir)
        if pidfile is not None:
            pidfile= os.path.abspath(pidfile)
        if logfile is not None:
            logfile= os.path.abspath(logfile)

        if ismain:
            mode= ''
            if len(sys.argv)>=2:
                mode= sys.argv[1]

            # Invoke application setup routines. Application should define a
            # setup() method if it wishes to support this.
            #
            if mode=='setup':
                application= factory()
                if not hasattr(application, 'setup'):
                    sys.stderr.write('this application has no setup routines\n')
                    sys.exit(2)
                application.setup(sys.argv[2:])
                sys.exit(0)

            # Quit poller daemon
            #
            if mode in ('stop', 'restart'):
                if not os.path.exists(pidfile):
                    sys.stderr.write('pidfile (%r) not present, daemon not running?\n' % pidfile)
                    sys.exit(1)
                fp= open(pidfile)
                pid= fp.read()
                fp.close()
                try:
                    pid= int(pid)
                except ValueError:
                    sys.stderr.write('pidfile contents not readable\n')
                    sys.exit(1)
                sys.stderr.write('Stopping process %i...' % pid)
                os.kill(pid, signal.SIGTERM)
                sys.stderr.write('stopped.\n')
                if mode=='stop':
                    sys.exit(0)
                # fallthru for restart mode

            # Invoke poller as daemon (double-fork)
            #
            if mode in ('start', 'restart'):
                if not hasattr(os, 'fork'):
                    sys.stderr.write('Cannot run as daemon, OS does not support fork()\n')
                    sys.exit(1)
                if pidfile is not None and os.path.exists(pidfile):
                    sys.stderr.write('pidfile exists, is daemon already running? If not, remove file %r\n' % pidfile)
                    sys.exit(1)
                sys.stdout.write('Starting poller as daemon\n')

                if os.fork()>0:
                    sys.exit(0)

                os.chdir('/')
                os.setsid()
                os.umask(0)
                if ugid is not None:
                    if len(ugid)>=3:
                        os.setgroups(ugid[2])
                    if len(ugid)>=2:
                        os.setgid(ugid[1])
                    if len(ugid)>=1:
                        os.setuid(ugid[0])

                pid= os.fork()
                if pid>0:
                    if pidfile is not None:
                        fp= open(pidfile,'wb')
                        fp.write(str(pid))
                        fp.close()
                    sys.exit(0)
                if pidfile is not None:
                    cleanup= _Cleanup(pidfile)
                    atexit.register(cleanup.close)
                    signal.signal(signal.SIGTERM, cleanup.signal)

                stdin= open('/dev/null', 'rb')
                stdout= open('/dev/null', 'wb')
                os.dup2(stdin.fileno(), sys.stdin.fileno())
                os.dup2(stdout.fileno(), sys.stdout.fileno())
                os.dup2(stdout.fileno(), sys.stderr.fileno())
                if logfile is not None:
                    sys.stdout=sys.stderr= _Log(open(logfile, 'a+'))
                # fallthru for start/restart mode

            # Run daemon. Application should provide a daemon() method if it
            # wishes to support this. This can be a function that runs
            # indefinitely, or a generator (or other iterator-returning function)
            # that yields delay times. In the latter case, wsgiwrap will resume the
            # generator after that amount of time, but if there is a module
            # update in the intervening time it will reload the application
            # and restart the daemon instead. So daemons written as generators
            # do not need to be 'restart'ed manually.
            #
            if mode in ('daemon', 'start', 'restart'):
                application= factory()
                if not hasattr(application, 'daemon'):
                    sys.stderr.write('this application has no daemon\n')
                    sys.exit(2)

                updater= ModuleUpdater(libdir, delay)
                iter= application.daemon()
                while iter is not None:
                    for delay in iter:
                        if delay>0:
                            time.sleep(delay)
                        if updater.update():
                            application= factory()
                            iter= application.daemon()
                            break
                sys.exit(0)

            # Invoke webapp as CGI
            #
            if mode=='cgi' or mode=='' and os.environ.get('GATEWAY_INTERFACE', '')!='':
                import wsgiref.handlers
                application= factory()
                wsgiref.handlers.CGIHandler().run(application)
                sys.exit(0)

            # Command fails
            #
            appname= 'application'
            if len(sys.argv)>=1:
                appname= sys.argv[0]
            sys.stderr.write('Usage: %s %s' % (appname, usage))
            sys.exit(2)

        # Not invoked as command - allow self to be used as WSGI application
        #
        else:
            self.factory= factory
            self.updater= ModuleUpdater(libdir, delay)
            self.application= None

    # Invoke webapp as WSGI, reloading modules when necessary
    #
    def __call__(self, environ, start_response):
        if self.updater.update() or self.application is None:
            self.application= self.factory()
        return self.application(environ, start_response)


# to consider adding: run-in-wsgiref-server, daemon as NT service
# to consider adding: ISAPI_WSGI support. But how?
#     1. require caller to do __ExtensionFactory__= isapi_wsgi.ISAPISimpleHandler(app= wrapper) ?
#     2. require caller to do __ExtensionFactory__= wrapper.__ExtensionFactory__ ?
#     3. change call style to wsgiwrap.wrap(factory, globals()) and write directly?
#     even then, how to support isapi-register? would need to at least know
#     which virtual server to bind to and on what path. Also isapi_wsgi
#     daftly binds SCRIPT_NAME/PATH_INFO one level down - how to cope?
# to consider adding: deployment mapping
#     For ISAPI we might want to provide eg. 'application.py isapi-register' to
#     to deploy the application to IIS. This means the deployment layer - in
#     co-operation with the application? - has to know paths of things to
#     install. If that information is built in, we can also have things like
#     'application.py showconf apache-cgi' to spit out an example Apache
#     <VirtualHost> block with reasonable mappings. Is this within wsgiwrap's
#     scope really?

