/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* libe-book
 * Version: MPL 2.0 / LGPLv2.1+
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * Alternatively, the contents of this file may be used under the terms
 * of the GNU Lesser General Public License Version 2.1 or later
 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
 * applicable instead of those above.
 *
 * For further information visit http://libebook.sourceforge.net
 */

#include <algorithm>
#include <cassert>
#include <cstring>
#include <deque>
#include <utility>

#include <boost/algorithm/string/case_conv.hpp>
#include <boost/assign.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/optional.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/spirit/include/classic.hpp>
#include <boost/unordered_map.hpp>

#include <libwpd/WPXPropertyList.h>
#include <libwpd/WPXString.h>

#include "libebook_utils.h"
#include "EBOOKCharsetConverter.h"
#include "EBOOKMemoryStream.h"
#include "EBOOKUTF8Stream.h"
#include "TDParser.h"
#include "PDXLZ77Stream.h"

using boost::lexical_cast;
using boost::optional;
using boost::to_lower;

using std::deque;
using std::string;
using std::vector;

namespace libebook
{

namespace
{

static const unsigned TD_BLOCK_SIZE = 4096;

static const unsigned TD_TYPE = PDX_CODE("TEXt");
static const unsigned TD_CREATOR = PDX_CODE("TlDc");

}

namespace
{

enum Font
{
  FONT_NORMAL,
  FONT_BOLD,
  FONT_LARGE
};

enum Style
{
  STYLE_NORMAL,
  STYLE_UNDERLINE,
  STYLE_INVERT
};

enum Align
{
  ALIGN_LEFT,
  ALIGN_RIGHT,
  ALIGN_CENTER
};

enum Token
{
  // tags
  TOKEN_BOOKMARK,
  TOKEN_HEADER,
  TOKEN_HRULE,
  TOKEN_LABEL,
  TOKEN_LINK,
  TOKEN_TEALPAINT,
  // attributes
  TOKEN_ALIGN,
  TOKEN_FONT,
  TOKEN_STYLE,
  TOKEN_TEXT,
  // values
  TOKEN_CENTER,
  TOKEN_INVERT,
  TOKEN_LEFT,
  TOKEN_NORMAL,
  TOKEN_RIGHT,
  TOKEN_UNDERLINE
};

struct TDAttributes
{
  TDAttributes();

  optional<Font> font;
  optional<Style> style;
  optional<Align> align;
};

TDAttributes::TDAttributes()
  : font()
  , style()
  , align()
{
}

}

class TDTextParser
{
  // -Weffc++
  TDTextParser(const TDTextParser &other);
  TDTextParser operator=(const TDTextParser &other);

  typedef deque<std::pair<string, string> > Attributes_t;
  typedef boost::unordered_map<string, Token> TokenMap_t;

  class SaveAttribute
  {
  public:
    SaveAttribute(Attributes_t &attributes, const string &attributeName, const string &attributeValue);

    void operator()(const char *first, const char *last) const;

  private:
    Attributes_t &m_attributes;
    const string &m_attributeName;
    const string &m_attributeValue;
  };

public:
  explicit TDTextParser(WPXDocumentInterface *document);

  void parse(WPXInputStream *input, bool last = false);

private:
  bool parseTag(WPXInputStream *input);

  bool parseHeaderTag(const Attributes_t &attributeList);
  void parseAttribute(const string &name, const string &rawValue, TDAttributes &attributes);

  void openParagraph(const TDAttributes &attributes = TDAttributes());
  void closeParagraph();

  void finishParagraph();
  void flushText(const TDAttributes &attributes = TDAttributes());

private:
  WPXDocumentInterface *const m_document;

  string m_text;

  bool m_openedParagraph;
};

TDTextParser::SaveAttribute::SaveAttribute(Attributes_t &attributes, const string &attributeName, const string &attributeValue)
  : m_attributes(attributes)
  , m_attributeName(attributeName)
  , m_attributeValue(attributeValue)
{
}

void TDTextParser::SaveAttribute::operator()(const char *, const char *) const
{
  string attributeName(m_attributeName);
  string attributeValue(m_attributeValue);
  to_lower(attributeName);
  to_lower(attributeValue);
  m_attributes.push_back(std::make_pair(attributeName, m_attributeValue));
}

TDTextParser::TDTextParser(WPXDocumentInterface *const document)
  : m_document(document)
  , m_text()
  , m_openedParagraph(false)
{
}

void TDTextParser::parse(WPXInputStream *const input, const bool last)
{
  while (!input->atEOS())
  {
    const unsigned char c = readU8(input);

    switch (c)
    {
    case '\n' :
      finishParagraph();
      break;
    case '<' :
      if (!parseTag(input))
        m_text.push_back('<');
      break;
    default :
      m_text.push_back(c);
    }
  }

  if (last)
    finishParagraph();
}

bool TDTextParser::parseTag(WPXInputStream *const input)
{
  const unsigned long pos = input->tell();
  string tag("<");

  // read tag into string
  // I suppose it would be possible to create an iterator adaptor for
  // WPXInputStream, but this is much simpler
  unsigned char c = 0;
  do
  {
    c = readU8(input);
    tag.push_back(c);
  }
  while ('>' != c);

  // parse tag
  bool success = false;
  string tagName;
  Attributes_t attributes;

  {
    namespace spirit = boost::spirit::classic;
    using namespace spirit;

    string attributeName;
    string attributeValue;

    rule<> name_r = as_lower_d[alpha_p && *alnum_p];
    rule<> tag_r = confix_p('<',
                            name_r[assign_a(tagName)] && +space_p
                            && !list_p(
                              name_r[assign_a(attributeName)] && '='
                              && (confix_p('"', (*anychar_p)[assign_a(attributeValue)], '"')
                                  | confix_p('\'', (*anychar_p)[assign_a(attributeValue)], '\'')
                                  | (+alnum_p)[assign_a(attributeValue)]
                                 )[SaveAttribute(attributes, attributeName, attributeValue)],
                              +space_p
                            ),
                            '>')
                   ;

    const parse_info<> result = spirit::parse(tag.c_str(), tag_r);
    success = result.hit && result.full;
  }

  // process tag
  if (success)
  {
    const TokenMap_t tagMap = boost::assign::map_list_of
                              (string("bookmark"), TOKEN_BOOKMARK)
                              (string("header"), TOKEN_HEADER)
                              (string("hrule"), TOKEN_HRULE)
                              (string("label"), TOKEN_LABEL)
                              (string("link"), TOKEN_LINK)
                              (string("tealpaint"), TOKEN_TEALPAINT)
                              ;

    to_lower(tagName);
    const TokenMap_t::const_iterator it = tagMap.find(tagName);

    if (it != tagMap.end())
    {
      switch (it->second)
      {
      case TOKEN_BOOKMARK :
      case TOKEN_HRULE :
      case TOKEN_LABEL :
      case TOKEN_LINK :
        // ignore
        break;

      case TOKEN_HEADER :
        finishParagraph();
        parseHeaderTag(attributes);
        break;
      case TOKEN_TEALPAINT :
        // TODO: handle
        break;

      default :
        break;
      }
    }
  }
  else
    input->seek(pos, WPX_SEEK_SET);

  return success;
}

bool TDTextParser::parseHeaderTag(const Attributes_t &attributeList)
{
  TDAttributes attributes;

  for (Attributes_t::const_iterator it = attributeList.begin(); it != attributeList.end(); ++it)
    parseAttribute(it->first, it->second, attributes);

  openParagraph(attributes);
  flushText(attributes);
  closeParagraph();

  return true;
}

void TDTextParser::parseAttribute(const string &name, const string &rawValue, TDAttributes &attributes)
{
  const TokenMap_t attributeMap = boost::assign::map_list_of
                                  // attributes
                                  (string("align"), TOKEN_ALIGN)
                                  (string("font"), TOKEN_FONT)
                                  (string("style"), TOKEN_STYLE)
                                  (string("text"), TOKEN_TEXT)
                                  // values
                                  (string("center"), TOKEN_CENTER)
                                  (string("invert"), TOKEN_INVERT)
                                  (string("left"), TOKEN_LEFT)
                                  (string("normal"), TOKEN_NORMAL)
                                  (string("right"), TOKEN_RIGHT)
                                  (string("underline"), TOKEN_UNDERLINE)
                                  ;

  const TokenMap_t::const_iterator attributeToken = attributeMap.find(name);

  if (attributeMap.end() != attributeToken)
  {
    string value(rawValue);

    switch (attributeToken->second)
    {
    case TOKEN_ALIGN :
    {
      to_lower(value);
      const TokenMap_t::const_iterator valueToken = attributeMap.find(value);
      if (attributeMap.end() != valueToken)
      {
        switch (valueToken->second)
        {
        case TOKEN_CENTER :
          attributes.align = ALIGN_CENTER;
          break;
        case TOKEN_LEFT :
          attributes.align = ALIGN_LEFT;
          break;
        case TOKEN_RIGHT :
          attributes.align = ALIGN_RIGHT;
          break;
        default :
          EBOOK_DEBUG_MSG(("unknown alignment %s\n", value.c_str()));
          break;
        }
      }
      break;
    }
    case TOKEN_FONT :
    {
      const unsigned font = lexical_cast<unsigned>(value);
      switch (font)
      {
      case 0 :
        attributes.font = FONT_NORMAL;
        break;
      case 1 :
        attributes.font = FONT_BOLD;
        break;
      case 2 :
        attributes.font = FONT_LARGE;
        break;
      default :
        EBOOK_DEBUG_MSG(("unknown font type %d\n", font));
      }
      break;
    }
    case TOKEN_STYLE :
    {
      to_lower(value);
      const TokenMap_t::const_iterator valueToken = attributeMap.find(value);
      if (attributeMap.end() != valueToken)
      {
        switch (valueToken->second)
        {
        case TOKEN_INVERT :
          attributes.style = STYLE_INVERT;
          break;
        case TOKEN_NORMAL :
          attributes.style = STYLE_NORMAL;
          break;
        case TOKEN_UNDERLINE :
          attributes.style = STYLE_UNDERLINE;
          break;
        default :
          EBOOK_DEBUG_MSG(("unknown style %s\n", value.c_str()));
          break;
        }
      }
      break;
    }
    case TOKEN_TEXT :
      m_text = value;
      break;
    default :
      break;
    }
  }
}

void TDTextParser::openParagraph(const TDAttributes &attributes)
{
  WPXPropertyList props;

  if (attributes.align)
  {
    switch (get(attributes.align))
    {
    case ALIGN_LEFT :
      props.insert("fo:text-align", "left");
      break;
    case ALIGN_RIGHT :
      props.insert("fo:text-align", "end");
      break;
    case ALIGN_CENTER :
      props.insert("fo:text-align", "center");
      break;
    default :
      break;
    }
  }

  m_document->openParagraph(props, WPXPropertyListVector());
  m_openedParagraph = true;
}

void TDTextParser::closeParagraph()
{
  if (m_openedParagraph)
    m_document->closeParagraph();
  m_openedParagraph = false;
}

void TDTextParser::finishParagraph()
{
  flushText();
  closeParagraph();
}

void TDTextParser::flushText(const TDAttributes &attributes)
{
  if (!m_openedParagraph)
    openParagraph(attributes);

  if (!m_text.empty())
  {
    WPXPropertyList props;

    if (attributes.font)
    {
      switch (get(attributes.font))
      {
      case FONT_BOLD :
        props.insert("fo:font-weight", "bold");
        break;
      case FONT_LARGE :
        // TODO: handle
        break;
      case FONT_NORMAL :
      // fall through
      default :
        break;
      }
    }

    if (attributes.style)
    {
      switch (get(attributes.style))
      {
      case STYLE_UNDERLINE :
        props.insert("style:text-underline-type", "single");
        break;
      case STYLE_INVERT :
        props.insert("fo:color", "#FFFFFF");
        props.insert("fo:background-color", "#000000");
        break;
      case STYLE_NORMAL :
      // fall through
      default :
        break;
      }
    }

    m_document->openSpan(props);
    m_document->insertText(WPXString(m_text.c_str()));
    m_document->closeSpan();

    m_text.clear();
  }
}

TDParser::TDParser(WPXInputStream *input, WPXDocumentInterface *document)
  : PDXParser(input, document)
  , m_compressed(false)
  , m_textLength(0)
  , m_recordCount(0)
  , m_recordSize(0)
  , m_read(0)
  , m_openedDocument(false)
  , m_converter(0)
  , m_textParser(new TDTextParser(document))
{
}

TDParser::~TDParser()
{
  delete m_converter;
  delete m_textParser;
}

bool TDParser::isFormatSupported(const unsigned type, const unsigned creator)
{
  return TD_TYPE == type && TD_CREATOR == creator;
}

void TDParser::readAppInfoRecord(WPXInputStream *)
{
  // there is no appInfo in TealDoc
}

void TDParser::readSortInfoRecord(WPXInputStream *)
{
  // there is no sortInfo in TealDoc
}

void TDParser::readIndexRecord(WPXInputStream *const input)
{
  const uint16_t compression = readU16(input, true);
  assert(1 == compression || 2 == compression);
  m_compressed = 2 == compression;
  skip(input, 2);
  m_textLength = readU32(input, true);
  m_recordCount = readU16(input, true);
  m_recordSize = readU16(input, true);

  // check consistency
  assert(m_recordCount == getDataRecordCount());
  assert(TD_BLOCK_SIZE == m_recordSize);
}

void TDParser::readDataRecord(WPXInputStream *input, const bool last)
{
  vector<char> uncompressed;
  uncompressed.reserve(m_recordSize);

  boost::scoped_ptr<WPXInputStream> compressedInput;

  // This should not happen, but it is the easier case anyway :-)
  if (m_compressed)
  {
    compressedInput.reset(new PDXLZ77Stream(input));
    input = compressedInput.get();
  }

  const long origPos = input->tell();
  while (!input->atEOS())
    uncompressed.push_back(readU8(input));
  m_read += (input->tell() - origPos);

  assert(m_read <= m_textLength);
  if (last)
    assert(m_read == m_textLength);

  if (!m_openedDocument)
  {
    createConverter(uncompressed);
    openDocument();
  }

  EBOOKMemoryStream uncompressedStrm(reinterpret_cast<unsigned char *>(&uncompressed[0]), uncompressed.size());
  EBOOKUTF8Stream utf8Strm(&uncompressedStrm);

  m_textParser->parse(&utf8Strm, last);

  if (last)
    closeDocument();
}

void TDParser::createConverter(const std::vector<char> &text)
{
  if (text.empty())
    return;

  EBOOKCharsetConverter *const converter = new EBOOKCharsetConverter();
  if (converter->guessEncoding(&text[0], text.size()))
    m_converter = converter;
  else
  {
    delete converter;
    throw GenericException();
  }
}

void TDParser::openDocument()
{
  if (m_openedDocument)
    return;

  getDocument()->startDocument();

  WPXPropertyList metadata;
  vector<char> nameUtf8;
  if (m_converter->convertBytes(getName(), std::strlen(getName()), nameUtf8) && !nameUtf8.empty())
    metadata.insert("dc:title", WPXString(&nameUtf8[0]));

  getDocument()->setDocumentMetaData(metadata);
  getDocument()->openPageSpan(WPXPropertyList());

  m_openedDocument = true;
}

void TDParser::closeDocument()
{
  getDocument()->closePageSpan();
  getDocument()->endDocument();
  m_openedDocument = false;
}

}

/* vim:set shiftwidth=2 softtabstop=2 expandtab: */
