///////
   //    HtmlParser.h
   //    HtmlParser Class declaration
   //
   //    Class for parsing of a HTML Document and for storing
   //    info into the DB.
   //
   //    Copyright (c) 1999-2004 Comune di Prato - Prato - Italy
   //    Some Portions Copyright (c) 2008 Devise.IT srl <http://www.devise.it/>
   //    Author: Gabriele Bartolini - Prato - Italy <angusgb@users.sourceforge.net>
   //
   //    For copyright details, see the file COPYING in your distribution
   //    or the GNU General Public License version 2 or later 
   //    <http://www.gnu.org/copyleft/gpl.html>
   //
   //    $Id: HtmlParser.h,v 1.30 2008-12-23 09:52:11 angusgb Exp $
   //
   //    G.Bartolini
   //    started: 30.01.2000
///////

#ifndef _HTMLPARSER_H
#define _HTMLPARSER_H

#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */

#ifdef HAVE_STD
#include <iostream>
#include <string>
#ifdef HAVE_NAMESPACES
using namespace std;
#endif
#else
#include <iostream.h>
#include <string.h>
#endif /* HAVE_STD */

#include "Scheduler.h"
#include "HtmlStatement.h"
#include "HtmlAttribute.h"
#include "Link.h"
#include "_Url.h"
#include "AccessibilityCheck.h"

#define HTCHECK_CHAR char

class HtmlParser
{
   public:

      HtmlParser();
      ~HtmlParser();

      // Enumeration of the parser codes returned by functions
      enum HtmlParser_Codes
      {
         HtmlParser_NullTag,
         HtmlParser_TagNotStored,
         HtmlParser_MalformedTag,
         HtmlParser_StatementFailed,
         HtmlParser_AttributeFailed,
         HtmlParser_AccessibilityCheckFailed,
         HtmlParser_NoLink,
         HtmlParser_NormalLink,
         HtmlParser_DirectLink,
         HtmlParser_Anchor,
         HtmlParser_LinkFailed,
         HtmlParser_OK,
      };

      HtmlParser_Codes operator() ( Scheduler &scheduler );
      
      // Static methods for managing debug level
      static void SetDebugLevel (int d) { debug=d;}

   protected:

   ///////
      // Protected Functions
   ///////

      HtmlParser_Codes ParseTag();           // Parse a HTML statement
      int CheckTag(const HtmlStatement& tag);   // Check if a tag has to be stored
      HtmlParser_Codes FindLink();           // Find a link
      /*
      const std::string encodeSGML(const std::string &str);
      const std::string decodeSGML(const std::string &str);
      */
      
   ///////
      // Protected Attributes
   ///////

      // Scheduler Object for getting/putting info from/into
      // memory and DB
      Scheduler *CurrentScheduler;

      // Base Url used for resolving relative paths
      _Url *BaseUrl;

      // Temporary buffer for tags storage
      HTCHECK_CHAR text[8192];
      
      // position is set to the beginning of the retrieved document contents
      HTCHECK_CHAR *position;

      // position is set to the beginning of the line
      HTCHECK_CHAR *linebeginning;

      // Temporary cursor for source string (contents)
      HTCHECK_CHAR *ppos;

      // Temporary cursor for destination string (text -> tags)
      HTCHECK_CHAR *ptext;
      
      // Counter of document tags
      unsigned int TagPosition;

      // Row number
      unsigned int row;

      // Col number
      unsigned int col;

      // Last tag with a link
      unsigned int LastLinkTagPosition;

      // Temporary Object for HtmlStatement storing
      HtmlStatement htmlstatement;

      // Temporary Object for HtmlAttribute storing
      HtmlAttribute htmlattribute;

      // Temporary Object for Link storing
      Link link;

      // Temporary std::string for Charset specification
      std::string Charset;

      // Temporary std::string for DocType specification
      std::string DocType;

      // HTML Description of a link (<A href="uri">description</a>)
      std::string LinkDescription;

      // Temporary std::string for Description
      std::string Description;

      // Temporary std::string for Keywords
      std::string Keywords;

      // HTML document language (HTML lang="xx(x)" according to ISO 639)
      std::string DocLanguage;

#ifdef HTDIG_NOTIFICATION
      // Temporary std::string for htdig-email directive
      std::string HtDigEmail;

      // Temporary std::string for htdig-email-subject directive
      std::string HtDigEmailSubject;

      // Temporary std::string for htdig-notification-date directive
      std::string HtDigNotificationDate;
#endif

      // Current header level
      int CurrentHx;

      // Previous header level
      int PreviousHx;

      // Current header level step
      int HxStep;

	  // Current alternative text
	  std::string CurrentAltText;

	  // Current resource reference
	  std::string CurrentResourceRef;

      // Previous ALT attribute position
      unsigned int AltAttrPosition;

///////
   //    Internal flags
///////

   bool ignore;      // if true we ignore the tags
   bool memo;        // Has the tag to be stored? true=yes
   int location;    // location in the document (script, title, link, etc.)
   int doc_acheck; // accessibility check info (document level)
   bool store_statement; // should we store the statement?
   HtmlStatement::ElementLabel CurrentTag;	// current tag


///////
   //    Static attributes
///////

      static int debug;    // Run-time debugging level

      // Encode an URL
      static void encodeURL(std::string &str, const std::string& reserved_chars);

	  // Insert an accessibility check record into the database
	  bool InsertAccessibilityCheck(unsigned int idurl, unsigned int tagposition,
         unsigned int attrposition, unsigned int code);

	  // Returns the length of a string (skipping consecutive spaces)
      unsigned CountSGMLStringLength(const char* str);

	  // Returns an integer with results of a check regarding an ALT text
      unsigned CheckAlt();

#ifdef HTDIG_NOTIFICATION
      // Properly set the htDig notification date
      bool parseDate(const std::string& date);

      // Test whether a date is correct
      bool testDate(const int dd, const int mm, const int yy) const;
      
      // Set the ht://Dig notification date
      void setHtDigNotificationDate(const int dd, const int mm, const int yy);
#endif

	inline void newRow();

};


void HtmlParser::newRow() {
	linebeginning = position;
	++row;
}

#endif
