'''
name: zipstream.py
purpose: read and write zip files without holding entire decoded files in memory
version: 0.1 <dogfood>
author: Andrew Clover <and@doxdesk.com>
based on: zipfile.py by James C. Ahlstrom <jim@interet.com>
licence: Python-style
'''

import struct, os, time, zlib
[false, true]= range(2)

ZipError= 'ZIP file format error'

# constants for Zip file compression methods
# (other ZIP compression methods not supported)

zipFormats= {
  'store': 0,
  'deflate': 8
}

def is_zipfile(filename):
  "Quickly see if file is a ZIP file by checking the magic number. Will not accept a ZIP archive with an ending comment."
  try:
    fpin = open(filename, "rb")
    fpin.seek(-22, 2)	# Seek to end-of-file record
    endrec = fpin.read()
    fpin.close()
    if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
      return true	# file has correct magic number
  except:
    pass
  return false

def zip2date(d):
  "Return (year, month, day) for a date in zip format"
  return (d>>9)+1980, (d>>5)&0xF, d&0x1F

def zip2time(t):
  "Return (hour, minute, second) for a time in zip format"
  return t>>11, (t>>5)&0x3F, t&0x1F * 2

def date2zip(year, month, day):
  "Return 16-bit zip date for year, month, day"
  return (year - 1980) << 9 | month << 5 | day

def time2zip(hour, minute, second):
  "Return 16-bit zip time for hour, minute, second"
  return hour << 11 | minute << 5 | second / 2

class ZipFile:
  "Class with methods to open, read, write, close, list zip files"
  # Here are some struct module formats for reading headers
  structEndArchive = "<4s4H2lH"		# 9 items, end of archive, 22 bytes
  stringEndArchive = "PK\005\006"	# magic number for end of archive record
  structCentralDir = "<4s4B4H3i5H2i"	# 19 items, central directory, 46 bytes
  stringCentralDir = "PK\001\002"	# magic number for central directory
  structFileHeader = "<4s2B4H3i2H"	# 12 items, file header record, 30 bytes
  stringFileHeader = "PK\003\004"	# magic number for file header

  def __init__(self, filename, mode = "r", compression= zipFormats['deflate']):
    "Construct a ZipFile instance and open the ZIP file named 'filename' with mode read 'r', write 'w' or append 'a'."
    if compression not in zipFormats.values():
      raise ZipError, 'Unsupported compression type '+"'"+str(compression)+"'"
    self.TOC = {}	# Table of contents for the archive
    self.compression = compression	# Method of compression
    self.filename = filename
    self.mode = key = mode[0]
    if key == 'r':
      self.fp = open(filename, "rb")
      self._getTOC()
    elif key == 'w':
      self.fp = open(filename, "wb")
    elif key == 'a':
      fp = self.fp = open(filename, "r+b")
      fp.seek(-22, 2)		# Seek to end-of-file record
      endrec = fp.read()
      if endrec[0:4] == self.stringEndArchive and endrec[-2:] == "\000\000":
        self._getTOC()	# file is a zip file
        fp.seek(self.start_dir, 0)	# seek to start of directory and overwrite
      else:		# file is not a zip file, just append
        fp.seek(0, 2)
    else:
      raise RuntimeError, 'Mode must be "r", "w" or "a"'

  def _getTOC(self):
    "Read in the table of contents for the zip file"
    fp = self.fp
    fp.seek(-22, 2)		# Start of end-of-archive record
    filesize = fp.tell() + 22	# Get file size
    endrec = fp.read(22)	# Archive must not end with a comment!
    if endrec[0:4] != self.stringEndArchive or endrec[-2:] != "\000\000":
      raise ZipError, "File is not a zip file, or ends with a comment"
    endrec = struct.unpack(self.structEndArchive, endrec)
    size_cd = endrec[5]		# bytes in central directory
    offset_cd = endrec[6]	# offset of central directory
    x = filesize - 22 - size_cd
    concat = x - offset_cd	# zero, unless zip was concatenated to another file
    self.start_dir = offset_cd + concat	# Position of start of central directory
    fp.seek(self.start_dir, 0)
    total = 0
    flist = []		# List of file header offsets
    while total < size_cd:
      centdir = fp.read(46)
      total = total + 46
      if centdir[0:4] != self.stringCentralDir:
        raise ZipError, "Bad magic number for central directory"
      centdir = struct.unpack(self.structCentralDir, centdir)
      fname = fp.read(centdir[12])
      extra = fp.read(centdir[13])
      comment = fp.read(centdir[14])
      total = total + centdir[12] + centdir[13] + centdir[14]
      flist.append(centdir[18])	# Offset of file header record
    toc = self.TOC	# Table of contents
    for offset in flist:
      fp.seek(offset + concat, 0)
      fheader = fp.read(30)
      if fheader[0:4] != self.stringFileHeader:
        raise ZipError, "Bad magic number for file header"
      fheader = struct.unpack(self.structFileHeader, fheader)
      fname = fp.read(fheader[10])
      extra = fp.read(fheader[11])
      toc[fname] = (fp.tell(), extra) + fheader[3:10]
      # toc key is the file name, value is:
      # 0:file offset, 1:extra data as string, 2:bit flags, 3:compression type,
      # 4:file time, 5:file date, 6:CRC-32, 7:compressed size, 
      # 8:uncompressed size.

  def listdir(self):
    return self.TOC.keys()

# read -- read a file from zip and pass its contents to a stream in chunks of default size 10K

  def read(self, name, stream, chunk= 10240):
    if self.mode not in ("r", "a"):
      raise ZipError, 'piperead() requires mode "r" or "a"'
    if not self.fp:
      raise ZipError, "Attempt to read ZIP archive that was already closed"

    data= self.TOC[name]
    filepos = self.fp.tell()
    self.fp.seek(data[0], 0)

    nbytes= data[7]
    format= data[3]
    if format not in zipFormats.values():
      self.fp.seek(filepos, 0)
      raise ZipError, 'Unsupported compression method '+"'"+str(data[3])+"'"

    if format==zipFormats['deflate']:
      dc = zlib.decompressobj(-15)

    crc= 0
    while nbytes>0:
      length= min(chunk, nbytes)
      bytes= self.fp.read(length)
      if format==zipFormats['deflate']:
        bytes= dc.decompress(bytes)
      crc= zlib.crc32(bytes, crc)
      stream.write(bytes)
      nbytes= nbytes-length

    if format==zipFormats['deflate']:
      bytes= dc.decompress('Z')+dc.flush()
      if bytes:
        crc= zlib.crc32(bytes, crc)
        stream.write(bytes)

    self.fp.seek(filepos, 0)
    if crc!=data[6]:
      raise ZipError, 'Bad CRC for file '+"'"+name+"'"
    return

# write -- take input from stream in chunks of default 10K, compress it, and write it to the zip

  def write(self, stream, arcname, chunk= 10240, extra= ''):
    if self.mode not in ("w", "a"):
      raise ZipError, 'write() requires mode "w" or "a"'
    if not self.fp:
      raise ZipError, 'Attempt to write ZIP archive that was already closed'

    t= time.localtime(time.time())[0:6]
    dosdate= date2zip(t[0], t[1], t[2])
    dostime= time2zip(t[3], t[4], t[5])
    compression= self.compression

    headerPos= self.fp.tell()
    self.fp.write('\000'*30) # write dummy header to be filled in afterwards
    self.fp.write(arcname)
    self.fp.write(extra)
    bytesPos= self.fp.tell()

    crc= 0
    cbytes= 0
    if compression==zipFormats['deflate']:
      co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)

    nbytes= 0
    more= true
    while more:
      bytes= stream.read(chunk)
      nbytes= nbytes+ len(bytes)
      more= (bytes!='')
      crc= zlib.crc32(bytes, crc)

      if compression== zipFormats['deflate']:
        if bytes!='':
          bytes= co.compress(bytes)
        else:
          bytes= co.flush()

      cbytes= cbytes+len(bytes)
      self.fp.write(bytes)

    endPos= self.fp.tell() # update header
    self.fp.seek(headerPos, 0)
    header = struct.pack(self.structFileHeader, 'PK\003\004',
       10, 0, 0, compression, dostime, dosdate,
       crc, cbytes, nbytes, len(arcname), len(extra))
    self.fp.write(header)
    self.fp.seek(endPos, 0)
    self.TOC[arcname] = (bytesPos, extra, 0, compression, dostime, dosdate, crc, cbytes, nbytes)

  def close(self):
    if self.mode in ("w", "a"):		# write ending records
      attrib = 0666 << 16		# file attributes
      count = 0
      pos1 = self.fp.tell()
      for name, data in self.TOC.items():	# write central directory
        count = count + 1
        namesize = len(name)
        extrasize = len(data[1])
        centdir = struct.pack(self.structCentralDir, self.stringCentralDir,
            20, 3, 10, 0, data[2], data[3], data[4], data[5], data[6], data[7],
            data[8], namesize, extrasize, 0, 0, 0, attrib,
            data[0] - namesize - extrasize - 30)
        self.fp.write(centdir)
        self.fp.write(name)
        self.fp.write(data[1])
      pos2 = self.fp.tell()
      endrec = struct.pack(self.structEndArchive, self.stringEndArchive,
             0, 0, count, count, pos2 - pos1, pos1, 0)
      self.fp.write(endrec)
    self.fp.close()
    self.fp = None
