""" DOM-to-text serialisers for PXTL implementations.
"""

__all__= [
  'SERIALISERS', 'Serialiser',
  'XMLSerialiser', 'XHTMLSerialiser', 'HTMLSerialiser', 'TextSerialiser'
]

import string
from pxtl.constants import *


class Serialiser:
  """ Interface for serialisers. A serialiser may be called (a) at output-time
      by the reference implementation, (b) at run-time by the compiled program
      created by the optimised implementation, or (c) at compile-time by the
      optimised implementation itself (for efficient handling of static
      content).

      In case (a) only the writeX methods are called. These take care of
      outputting all markup parts and recursing into children. In (b) only the
      startX/endX methods are called, outputting single markup parts without
      having the original DOM nodes to hand. The caller must then take care of
      call order and recursion. In (c) a mixture of both types of method may
      be used depending on how much extra templating logic is used.
  """
  name= '(unknown serialiser)'
  def __init__(self, writer, encoding):
    if encoding is None:
      self.w= lambda s, escape= None, w= writer.write: w(s)
    else:
      self.w= Encoder(writer, encoding).write
    self.literals= []
    self.inattr= False
  def writeChildren(self, node):
    for child in node.childNodes:
      self.beforeChild(child)
      if child.nodeType==node.ELEMENT_NODE:
        self.writeElement(child)
      elif child.nodeType==node.TEXT_NODE:
        self.writeText(child)
      elif child.nodeType==node.ENTITY_REFERENCE_NODE:
        self.writeEntity(child)
      elif child.nodeType==node.COMMENT_NODE:
        self.writeComment(child)
      elif child.nodeType==node.CDATA_SECTION_NODE:
        self.writeCDATA(child)
      elif child.nodeType==node.PROCESSING_INSTRUCTION_NODE:
        self.writePI(child)
      elif child.nodeType==node.DOCUMENT_TYPE_NODE:
        self.writeDoctype(child)
  def beforeChild(self, child, ignoreDoctypeHack= False):
    if child.parentNode.nodeType==child.DOCUMENT_NODE:
      previous= child.previousSibling
      if previous is not None and not (
        ignoreDoctypeHack and previous.nodeType==previous.DOCUMENT_TYPE_NODE
      ):
        self.w('\n')

  def writeDocument(self, node, literals):
    self.literals= literals
    self.startDocument(node)
    self.writeChildren(node)
    self.endDocument(node)
  def startDocument(self, node): pass
  def endDocument(self, node): pass

  def writeElement(self, node):
    empty= len(node.childNodes)==0
    self.startElement(node.nodeName, empty)
    for i in range(node.attributes.length):
      self.writeAttribute(node.attributes.item(i))
    self.endElement(node.nodeName, empty)
    self.writeChildren(node)
    self.closeElement(node.nodeName, empty)
  def startElement(self, nodeName, empty): pass
  def endElement(self, nodeName, empty): pass
  def closeElement(self, nodeName, empty): pass

  def writeAttribute(self, node):
    times= self.attributeTimes(node.nodeName)
    for time in range(times):
      self.startAttribute(node.nodeName, time)
      self.inattr= True
      l= len(node.childNodes)
      if l==0 or (l==1 and node.childNodes[0].nodeType==node.TEXT_NODE):
        self.writeString(node.value)
      else:
        self.writeChildren(node)
      self.inattr= False
      self.endAttribute(node.nodeName, time)

  def attributeTimes(self, nodeName): return 1
  def startAttribute(self, nodeName, time= 0): pass
  def endAttribute(self, nodeName, time= 0): pass

  def writeDoctype(self, node):
    self.startDoctype(
      node.nodeName, node.publicId, node.systemId, node.internalSubset
    )
  def startDoctype(self, nodeName, publicId, systemId, internalSubset, nl= 0):
    pass

  def writeCDATA(self, node):
    contentType= ''
    if node.parentNode is not None:
      if node.parentNode.nodeType==node.ELEMENT_NODE:
        contentType= HTML_DEFAULTTYPES.get(node.parentNode.nodeName, '')
        if node.parentNode.hasAttribute('type'):
          contentType= node.parentNode.getAttribute('type')
    self.startCDATA(node.data, contentType)
  def startCDATA(self, data, contentType): pass

  def writeText(self, node): pass
  def writeEntity(self, node): pass
  def writeComment(self, node): pass
  def writePI(self, node): pass

  def writeString(self, s): pass


class XMLSerialiser(Serialiser):
  """ Serialiser for plain XML.
  """
  name= 'xml'
  def startDocument(self, node):
    if (node.xmlVersion!='1.0' or
      node.xmlEncoding is not None or node.xmlStandalone
    ):
      self.w('<?xml version="%s"' % node.xmlVersion)
      if node.xmlEncoding is not None:
        self.w(' encoding="%s"' % node.xmlEncoding)
      if node.xmlStandalone:
        self.w(' standalone="yes"')
      self.w('?>\n')

  def startElement(self, nodeName, empty):
    self.w('<')
    self.w(nodeName)
  def endElement(self, nodeName, empty):
    if empty:
      self.w('/>')
    else:
      self.w('>')
  def closeElement(self, nodeName, empty):
    if not empty:
      self.w('</')
      self.w(nodeName)
      self.w('>')

  def startAttribute(self, nodeName, time= 0):
    self.w(' ')
    self.w(nodeName)
    self.w('="')
  def endAttribute(self, nodeName, time= 0):
    self.w('"')

  def startDoctype(self, nodeName, publicId, systemId, internalSubset, nl= 0):
    if (publicId, systemId, internalSubset)!=(None, None, None):
      self.w('<!DOCTYPE ')
      self.w(nodeName)
      if systemId is not None and publicId is None:
        self.w(' SYSTEM "')
      elif publicId is not None:
        self.w(' PUBLIC "')
        self.w(publicId)
        self.w('"')
        if systemId is not None:
          self.w(' "')
      if systemId is not None:
        self.w(systemId)
        self.w('"')
      if internalSubset is not None:
        self.w(' [')
        self.w(internalSubset)
        self.w(']')
      self.w('>')
      if nl:
        self.w('\n')

  def writeText(self, node):
    if node in self.literals:
      self.w(node.data)
    else:
      self.writeString(node.data)

  def writeEntity(self, node):
    self.w('&')
    self.w(node.nodeName)
    self.w(';')

  def writeComment(self, node):
    data= string.replace(node.data, '--', '__')
    self.w('<!--')
    self.w(data)
    if data[-1:]=='-':
      self.w(' ')
    self.w('-->')    

  def startCDATA(self, data, contentType):
    first= True
    for part in string.split(data, ']]>'):
      if not first:
        self.w(']]>')
      self.w('<![CDATA[')
      self.w(data, escape= ']]>&#%d;<![CDATA[')
      self.w(']]>')
      first= False

  def writePI(self, node):
    self.w('<?')
    self.w(node.target)
    if node.data!='':
      self.w(' ')
      self.w(node.data)
    self.w('?>')

  def writeString(self, s):
    r= string.replace
    m= r(r(r(s,'&','&amp;'),'<','&lt;'),']]>',']]&gt;')
    if self.inattr:
      m= r(r(r(r(m,'"','&quot;'),'\n','&#10;'),'\t','&#9;'),'\r','&#13;')
    self.w(m, escape= '&#%d;')


class XHTMLSerialiser(XMLSerialiser):
  """ XML serialiser whose XHTML output will be compatible with legacy HTML,
      as described in XHTML 1.0 appendix C.
  """
  name= 'xhtml'
  def startDocument(self, node): pass

  def endElement(self, nodeName, empty):
    if empty and nodeName in HTML_EMPTY:
      self.w(' />')
    else:
      self.w('>')
  def closeElement(self, nodeName, empty):
    if not (empty and nodeName in HTML_EMPTY):
      self.w('</')
      self.w(nodeName)
      self.w('>')

  def attributeTimes(self, nodeName):
    if nodeName=='xml:lang':
      return 2
    return 1
  def startAttribute(self, nodeName, time= 0):
    if time>0:
      nodeName= 'lang'
    XMLSerialiser.startAttribute(self, nodeName, time)

  def startCDATA(self, data, contentType):
    if contentType=='text/css':
      self.w('<!--/*--><![CDATA[/*><!--*/')
      self.w(data)
      self.w('/*]]>*/-->')
    elif contentType in ('text/javascript','text/ecmascript','text/jscript'):
      self.w('<!--/*--><![CDATA[/*><!--*/')
      self.w(data)
      self.w('//]]>-->')
    elif contentType=='text/vbscript':
      self.w('<!--\'--><![CDATA[\'><!--\n')
      self.w(data)
      self.w('\']]>-->')
    else:
      self.writeString(data)


class HTMLSerialiser(XHTMLSerialiser):
  name= 'html'
  def endElement(self, node, empty):
    self.w('>')

  def attributeTimes(self, nodeName):
    if nodeName!='xml:lang' and (
      string.split(nodeName, ':', 1)[0] in ('xml', 'xmlns')
    ):
      return 0
    return 1
  def startAttribute(self, nodeName, time= 0):
    if nodeName=='xml:lang':
      nodeName= 'lang'
    if nodeName not in HTML_MINIMISABLE:
      XHTMLSerialiser.startAttribute(self, nodeName, time)
    else:
      self.w(' ')
  def endAttribute(self, nodeName, time= 0):
    if nodeName not in HTML_MINIMISABLE:
      XHTMLSerialiser.endAttribute(self, nodeName, time)

  def startCDATA(self, data, contentType):
    if contentType=='text/css':
      self.w('<!--')
      self.w(data)
      self.w('-->')
    elif contentType in ('text/javascript','text/ecmascript','text/jscript'):
      self.w('<!--\n')
      self.w(data)
      self.w('//-->')
    elif contentType=='text/vbscript':
      self.w('<!--\n')
      self.w(data)
      self.w('\'-->')
    else:
      self.writeString(data)

  # Escape '>' in attribute values for extremely broken browsers (DoCoMo/1.0)
  #
  def writeString(self, s):
    r= string.replace
    m= r(r(r(s,'&','&amp;'),'<','&lt;'),']]>',']]&gt;')
    if self.inattr:
      m= r(r(r(r(r(
        m, '>', '&gt;'), '"','&quot;'),'\n','&#10;'),'\t','&#9;'),'\r','&#13;')
    self.w(m, escape= '&#%d;')



class TextSerialiser(Serialiser):
  """ Plain-text serialiser. Never outputs any markup. All text is sent to
      write() unescaped.
  """
  name= 'text'
  def writeText(self, node):
    self.writeString(node.data)
  def writeString(self, s):
    if not self.inattr:
      self.w(s)
  def writeCDATA(self, node):
    self.w(node.data)
  def writeEntity(self, node):
    self.writeChildren(node)


# HTML element info constants for xhtml and html serialisers
#
HTML_EMPTY= [
  'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img',
  'input', 'isindex', 'link', 'meta', 'param',
  # non-standard empty elements
  'bgsound', 'embed'
]

HTML_DEFAULTTYPES= {'style': 'text/css', 'script': 'text/javascript'}

HTML_MINIMISABLE= [
  'checked', 'compact', 'declare', 'defer', 'disabled', 'ismap', 'multiple',
  'nohref', 'noresize', 'noshade', 'nowrap', 'readonly', 'selected'
]

# Create mapping of serialiser names to their classes.
#
SERIALISERS= {}
for s in [XMLSerialiser, XHTMLSerialiser, HTMLSerialiser, TextSerialiser]:
  SERIALISERS[s.name]= s
del s

# Pipe class to encode unicode to strings before sending to an output stream.
#
class Encoder:
  def __init__(self, writer, encoding):
    self.writer= writer
    self.encoding= encoding

  def write(self, s, escape= None):
    """ Write a given byte string or unicode string to the output stream. If
        an escape argument is passed, replace any characters that cannot be
        represented in the output encoding with that string. Include a %d in
        the string to substitute the character number.
    """
    # In Python 1.6, plain strings have no .encode method, so these have to be
    # turned into Unicode first.
    #
    if not isinstance(s, UnicodeType):
      s= unicode(s, 'us-ascii', 'replace')
    if escape is None:
      self.writer.write(s.encode(self.encoding, 'replace'))
    else:
      try:
        self.writer.write(s.encode(self.encoding, 'strict'))
      except UnicodeError:

        # There is at least one unrepresentable character in the string. Try
        # to encode each character seperately, using the replacement string
        # for any single characters that fail.
        #
        for char in s:
          try:
            self.writer.write(char.encode(self.encoding, 'strict'))
          except UnicodeError:
            self.writer.write(escape % ord(char))
