{- This module was generated from data in the Kate syntax highlighting file html.xml, version 1.99,
   by  Wilbert Berendsen (wilbert@kde.nl) -}

module Text.Highlighting.Kate.Syntax.Html ( highlight, parseExpression, syntaxName, syntaxExtensions ) where
import Text.Highlighting.Kate.Definitions
import Text.Highlighting.Kate.Common
import qualified Text.Highlighting.Kate.Syntax.Alert
import qualified Text.Highlighting.Kate.Syntax.Css
import qualified Text.Highlighting.Kate.Syntax.Javascript
import Text.ParserCombinators.Parsec
import Data.List (nub)
import qualified Data.Set as Set
import Data.Map (fromList)
import Data.Maybe (fromMaybe)

-- | Full name of language.
syntaxName :: String
syntaxName = "HTML"

-- | Filename extensions for this language.
syntaxExtensions :: String
syntaxExtensions = "*.htm;*.html;*.shtml;*.shtm"

-- | Highlight source code using this syntax definition.
highlight :: String -> Either String [SourceLine]
highlight input =
  case runParser parseSource startingState "source" input of
    Left err     -> Left $ show err
    Right result -> Right result

-- | Parse an expression using appropriate local context.
parseExpression :: GenParser Char SyntaxState LabeledSource
parseExpression = do
  st <- getState
  let oldLang = synStLanguage st
  setState $ st { synStLanguage = "HTML" }
  context <- currentContext <|> (pushContext "Start" >> currentContext)
  result <- parseRules context
  updateState $ \st -> st { synStLanguage = oldLang }
  return result

parseSource = do 
  lineContents <- lookAhead wholeLine
  updateState $ \st -> st { synStCurrentLine = lineContents }
  result <- manyTill parseSourceLine eof
  return $ map normalizeHighlighting result

startingState = SyntaxState {synStContexts = fromList [("HTML",["Start"])], synStLanguage = "HTML", synStCurrentLine = "", synStCharsParsedInLine = 0, synStPrevChar = '\n', synStCaseSensitive = True, synStKeywordCaseSensitive = True, synStCaptures = []}

parseSourceLine = manyTill parseExpressionInternal pEndLine

pEndLine = do
  newline <|> (eof >> return '\n')
  context <- currentContext
  case context of
    "Start" -> return ()
    "FindHTML" -> return ()
    "FindEntityRefs" -> return ()
    "FindPEntityRefs" -> return ()
    "FindAttributes" -> return ()
    "FindDTDRules" -> return ()
    "Comment" -> return ()
    "CDATA" -> return ()
    "PI" -> return ()
    "Doctype" -> return ()
    "Doctype Internal Subset" -> return ()
    "Doctype Markupdecl" -> return ()
    "Doctype Markupdecl DQ" -> return ()
    "Doctype Markupdecl SQ" -> return ()
    "El Open" -> return ()
    "El Close" -> return ()
    "El Close 2" -> return ()
    "El Close 3" -> return ()
    "CSS" -> return ()
    "CSS content" -> return ()
    "JS" -> return ()
    "JS content" -> return ()
    "JS comment close" -> (popContext >> return ())
    "Value" -> return ()
    "Value NQ" -> (popContext >> popContext >> return ())
    "Value DQ" -> return ()
    "Value SQ" -> return ()
    _ -> return ()
  lineContents <- lookAhead wholeLine
  updateState $ \st -> st { synStCurrentLine = lineContents, synStCharsParsedInLine = 0, synStPrevChar = '\n' }

withAttribute attr txt = do
  if null txt
     then fail "Parser matched no text"
     else return ()
  let style = fromMaybe "" $ lookup attr styles
  st <- getState
  let oldCharsParsed = synStCharsParsedInLine st
  let prevchar = if null txt then '\n' else last txt
  updateState $ \st -> st { synStCharsParsedInLine = oldCharsParsed + length txt, synStPrevChar = prevchar } 
  return (nub [style, attr], txt)

styles = [("Normal Text","Normal"),("Comment","Comment"),("CDATA","BaseN"),("Processing Instruction","Keyword"),("Doctype","DataType"),("Element","Keyword"),("Attribute","Others"),("Value","String"),("EntityRef","DecVal"),("PEntityRef","DecVal"),("Error","Error")]

parseExpressionInternal = do
  context <- currentContext
  parseRules context <|> (pDefault >>= withAttribute (fromMaybe "" $ lookup context defaultAttributes))


regex_'3c'21DOCTYPE'5cs'2b = compileRegex "<!DOCTYPE\\s+"
regex_'3c'5c'3f'5b'5cw'3a'2d'5d'2a = compileRegex "<\\?[\\w:-]*"
regex_'3cstyle'5cb = compileRegex "<style\\b"
regex_'3cscript'5cb = compileRegex "<script\\b"
regex_'3cpre'5cb = compileRegex "<pre\\b"
regex_'3cdiv'5cb = compileRegex "<div\\b"
regex_'3ctable'5cb = compileRegex "<table\\b"
regex_'3cul'5cb = compileRegex "<ul\\b"
regex_'3col'5cb = compileRegex "<ol\\b"
regex_'3cdl'5cb = compileRegex "<dl\\b"
regex_'3c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "<[A-Za-z_:][\\w.:_-]*"
regex_'3c'2fpre'5cb = compileRegex "</pre\\b"
regex_'3c'2fdiv'5cb = compileRegex "</div\\b"
regex_'3c'2ftable'5cb = compileRegex "</table\\b"
regex_'3c'2ful'5cb = compileRegex "</ul\\b"
regex_'3c'2fol'5cb = compileRegex "</ol\\b"
regex_'3c'2fdl'5cb = compileRegex "</dl\\b"
regex_'3c'2f'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "</[A-Za-z_:][\\w.:_-]*"
regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b = compileRegex "&(#[0-9]+|#[xX][0-9A-Fa-f]+|[A-Za-z_:][\\w.:_-]*);"
regex_'25'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'3b = compileRegex "%[A-Za-z_:][\\w.:_-]*;"
regex_'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "[A-Za-z_:][\\w.:_-]*"
regex_'5cs'2b'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "\\s+[A-Za-z_:][\\w.:_-]*"
regex_'3c'21'28ELEMENT'7cENTITY'7cATTLIST'7cNOTATION'29'5cb = compileRegex "<!(ELEMENT|ENTITY|ATTLIST|NOTATION)\\b"
regex_'2d'28'2d'28'3f'21'2d'3e'29'29'2b = compileRegex "-(-(?!->))+"
regex_'5cS = compileRegex "\\S"
regex_'3c'2fstyle'5cb = compileRegex "</style\\b"
regex_'3c'2fscript'5cb = compileRegex "</script\\b"
regex_'2f'2f'28'3f'3d'2e'2a'3c'2fscript'5cb'29 = compileRegex "//(?=.*</script\\b)"
regex_'2f'28'3f'21'3e'29 = compileRegex "/(?!>)"
regex_'5b'5e'2f'3e'3c'22'27'5cs'5d = compileRegex "[^/><\"'\\s]"

defaultAttributes = [("Start","Normal Text"),("FindHTML","Normal Text"),("FindEntityRefs","Normal Text"),("FindPEntityRefs","Normal Text"),("FindAttributes","Normal Text"),("FindDTDRules","Normal Text"),("Comment","Comment"),("CDATA","Normal Text"),("PI","Normal Text"),("Doctype","Normal Text"),("Doctype Internal Subset","Normal Text"),("Doctype Markupdecl","Normal Text"),("Doctype Markupdecl DQ","Value"),("Doctype Markupdecl SQ","Value"),("El Open","Normal Text"),("El Close","Normal Text"),("El Close 2","Normal Text"),("El Close 3","Normal Text"),("CSS","Normal Text"),("CSS content","Normal Text"),("JS","Normal Text"),("JS content","Normal Text"),("JS comment close","Comment"),("Value","Normal Text"),("Value NQ","Normal Text"),("Value DQ","Value"),("Value SQ","Value")]

parseRules "Start" = 
  do (attr, result) <- ((parseRules "FindHTML"))
     return (attr, result)

parseRules "FindHTML" = 
  do (attr, result) <- (((pDetectSpaces >>= withAttribute "Normal Text"))
                        <|>
                        ((pDetectIdentifier >>= withAttribute "Normal Text"))
                        <|>
                        ((pString False "<!--" >>= withAttribute "Comment") >>~ pushContext "Comment")
                        <|>
                        ((pString False "<![CDATA[" >>= withAttribute "CDATA") >>~ pushContext "CDATA")
                        <|>
                        ((pRegExpr regex_'3c'21DOCTYPE'5cs'2b >>= withAttribute "Doctype") >>~ pushContext "Doctype")
                        <|>
                        ((pRegExpr regex_'3c'5c'3f'5b'5cw'3a'2d'5d'2a >>= withAttribute "Processing Instruction") >>~ pushContext "PI")
                        <|>
                        ((pRegExpr regex_'3cstyle'5cb >>= withAttribute "Element") >>~ pushContext "CSS")
                        <|>
                        ((pRegExpr regex_'3cscript'5cb >>= withAttribute "Element") >>~ pushContext "JS")
                        <|>
                        ((pRegExpr regex_'3cpre'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
                        <|>
                        ((pRegExpr regex_'3cdiv'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
                        <|>
                        ((pRegExpr regex_'3ctable'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
                        <|>
                        ((pRegExpr regex_'3cul'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
                        <|>
                        ((pRegExpr regex_'3col'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
                        <|>
                        ((pRegExpr regex_'3cdl'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
                        <|>
                        ((pRegExpr regex_'3c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Element") >>~ pushContext "El Open")
                        <|>
                        ((pRegExpr regex_'3c'2fpre'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
                        <|>
                        ((pRegExpr regex_'3c'2fdiv'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
                        <|>
                        ((pRegExpr regex_'3c'2ftable'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
                        <|>
                        ((pRegExpr regex_'3c'2ful'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
                        <|>
                        ((pRegExpr regex_'3c'2fol'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
                        <|>
                        ((pRegExpr regex_'3c'2fdl'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
                        <|>
                        ((pRegExpr regex_'3c'2f'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Element") >>~ pushContext "El Close")
                        <|>
                        ((parseRules "FindDTDRules"))
                        <|>
                        ((parseRules "FindEntityRefs")))
     return (attr, result)

parseRules "FindEntityRefs" = 
  do (attr, result) <- (((pRegExpr regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b >>= withAttribute "EntityRef"))
                        <|>
                        ((pAnyChar "&<" >>= withAttribute "Error")))
     return (attr, result)

parseRules "FindPEntityRefs" = 
  do (attr, result) <- (((pRegExpr regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b >>= withAttribute "EntityRef"))
                        <|>
                        ((pRegExpr regex_'25'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'3b >>= withAttribute "PEntityRef"))
                        <|>
                        ((pAnyChar "&%" >>= withAttribute "Error")))
     return (attr, result)

parseRules "FindAttributes" = 
  do (attr, result) <- (((pColumn 0 >> pRegExpr regex_'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Attribute"))
                        <|>
                        ((pRegExpr regex_'5cs'2b'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Attribute"))
                        <|>
                        ((pDetectChar False '=' >>= withAttribute "Attribute") >>~ pushContext "Value"))
     return (attr, result)

parseRules "FindDTDRules" = 
  do (attr, result) <- ((pRegExpr regex_'3c'21'28ELEMENT'7cENTITY'7cATTLIST'7cNOTATION'29'5cb >>= withAttribute "Doctype") >>~ pushContext "Doctype Markupdecl")
     return (attr, result)

parseRules "Comment" = 
  do (attr, result) <- (((pDetectSpaces >>= withAttribute "Comment"))
                        <|>
                        ((Text.Highlighting.Kate.Syntax.Alert.parseExpression >>= ((withAttribute "") . snd)))
                        <|>
                        ((pDetectIdentifier >>= withAttribute "Comment"))
                        <|>
                        ((pString False "-->" >>= withAttribute "Comment") >>~ (popContext >> return ()))
                        <|>
                        ((pRegExpr regex_'2d'28'2d'28'3f'21'2d'3e'29'29'2b >>= withAttribute "Error")))
     return (attr, result)

parseRules "CDATA" = 
  do (attr, result) <- (((pDetectSpaces >>= withAttribute "Normal Text"))
                        <|>
                        ((pDetectIdentifier >>= withAttribute "Normal Text"))
                        <|>
                        ((pString False "]]>" >>= withAttribute "CDATA") >>~ (popContext >> return ()))
                        <|>
                        ((pString False "]]&gt;" >>= withAttribute "EntityRef")))
     return (attr, result)

parseRules "PI" = 
  do (attr, result) <- ((pDetect2Chars False '?' '>' >>= withAttribute "Processing Instruction") >>~ (popContext >> return ()))
     return (attr, result)

parseRules "Doctype" = 
  do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Doctype") >>~ (popContext >> return ()))
                        <|>
                        ((pDetectChar False '[' >>= withAttribute "Doctype") >>~ pushContext "Doctype Internal Subset"))
     return (attr, result)

parseRules "Doctype Internal Subset" = 
  do (attr, result) <- (((pDetectChar False ']' >>= withAttribute "Doctype") >>~ (popContext >> return ()))
                        <|>
                        ((parseRules "FindDTDRules"))
                        <|>
                        ((pString False "<!--" >>= withAttribute "Comment") >>~ pushContext "Comment")
                        <|>
                        ((pRegExpr regex_'3c'5c'3f'5b'5cw'3a'2d'5d'2a >>= withAttribute "Processing Instruction") >>~ pushContext "PI")
                        <|>
                        ((parseRules "FindPEntityRefs")))
     return (attr, result)

parseRules "Doctype Markupdecl" = 
  do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Doctype") >>~ (popContext >> return ()))
                        <|>
                        ((pDetectChar False '"' >>= withAttribute "Value") >>~ pushContext "Doctype Markupdecl DQ")
                        <|>
                        ((pDetectChar False '\'' >>= withAttribute "Value") >>~ pushContext "Doctype Markupdecl SQ"))
     return (attr, result)

parseRules "Doctype Markupdecl DQ" = 
  do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "Value") >>~ (popContext >> return ()))
                        <|>
                        ((parseRules "FindPEntityRefs")))
     return (attr, result)

parseRules "Doctype Markupdecl SQ" = 
  do (attr, result) <- (((pDetectChar False '\'' >>= withAttribute "Value") >>~ (popContext >> return ()))
                        <|>
                        ((parseRules "FindPEntityRefs")))
     return (attr, result)

parseRules "El Open" = 
  do (attr, result) <- (((pDetect2Chars False '/' '>' >>= withAttribute "Element") >>~ (popContext >> return ()))
                        <|>
                        ((pDetectChar False '>' >>= withAttribute "Element") >>~ (popContext >> return ()))
                        <|>
                        ((parseRules "FindAttributes"))
                        <|>
                        ((pRegExpr regex_'5cS >>= withAttribute "Error")))
     return (attr, result)

parseRules "El Close" = 
  do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Element") >>~ (popContext >> return ()))
                        <|>
                        ((pRegExpr regex_'5cS >>= withAttribute "Error")))
     return (attr, result)

parseRules "El Close 2" = 
  do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Element") >>~ (popContext >> popContext >> popContext >> return ()))
                        <|>
                        ((pRegExpr regex_'5cS >>= withAttribute "Error")))
     return (attr, result)

parseRules "El Close 3" = 
  do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Element") >>~ (popContext >> popContext >> popContext >> popContext >> return ()))
                        <|>
                        ((pRegExpr regex_'5cS >>= withAttribute "Error")))
     return (attr, result)

parseRules "CSS" = 
  do (attr, result) <- (((pDetect2Chars False '/' '>' >>= withAttribute "Element") >>~ (popContext >> return ()))
                        <|>
                        ((pDetectChar False '>' >>= withAttribute "Element") >>~ pushContext "CSS content")
                        <|>
                        ((parseRules "FindAttributes"))
                        <|>
                        ((pRegExpr regex_'5cS >>= withAttribute "Error")))
     return (attr, result)

parseRules "CSS content" = 
  do (attr, result) <- (((pRegExpr regex_'3c'2fstyle'5cb >>= withAttribute "Element") >>~ pushContext "El Close 2")
                        <|>
                        ((Text.Highlighting.Kate.Syntax.Css.parseExpression)))
     return (attr, result)

parseRules "JS" = 
  do (attr, result) <- (((pDetect2Chars False '/' '>' >>= withAttribute "Element") >>~ (popContext >> return ()))
                        <|>
                        ((pDetectChar False '>' >>= withAttribute "Element") >>~ pushContext "JS content")
                        <|>
                        ((parseRules "FindAttributes"))
                        <|>
                        ((pRegExpr regex_'5cS >>= withAttribute "Error")))
     return (attr, result)

parseRules "JS content" = 
  do (attr, result) <- (((pRegExpr regex_'3c'2fscript'5cb >>= withAttribute "Element") >>~ pushContext "El Close 2")
                        <|>
                        ((pRegExpr regex_'2f'2f'28'3f'3d'2e'2a'3c'2fscript'5cb'29 >>= withAttribute "Comment") >>~ pushContext "JS comment close")
                        <|>
                        ((Text.Highlighting.Kate.Syntax.Javascript.parseExpression)))
     return (attr, result)

parseRules "JS comment close" = 
  do (attr, result) <- (((pRegExpr regex_'3c'2fscript'5cb >>= withAttribute "Element") >>~ pushContext "El Close 3")
                        <|>
                        ((Text.Highlighting.Kate.Syntax.Alert.parseExpression >>= ((withAttribute "") . snd))))
     return (attr, result)

parseRules "Value" = 
  do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "Value") >>~ pushContext "Value DQ")
                        <|>
                        ((pDetectChar False '\'' >>= withAttribute "Value") >>~ pushContext "Value SQ")
                        <|>
                        ((pDetectSpaces >>= withAttribute "Normal Text"))
                        <|>
                        (pushContext "Value NQ" >> return ([], "")))
     return (attr, result)

parseRules "Value NQ" = 
  do (attr, result) <- (((parseRules "FindEntityRefs"))
                        <|>
                        ((pRegExpr regex_'2f'28'3f'21'3e'29 >>= withAttribute "Value"))
                        <|>
                        ((pRegExpr regex_'5b'5e'2f'3e'3c'22'27'5cs'5d >>= withAttribute "Value"))
                        <|>
                        ((popContext >> popContext >> return ()) >> return ([], "")))
     return (attr, result)

parseRules "Value DQ" = 
  do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "Value") >>~ (popContext >> popContext >> return ()))
                        <|>
                        ((parseRules "FindEntityRefs")))
     return (attr, result)

parseRules "Value SQ" = 
  do (attr, result) <- (((pDetectChar False '\'' >>= withAttribute "Value") >>~ (popContext >> popContext >> return ()))
                        <|>
                        ((parseRules "FindEntityRefs")))
     return (attr, result)

parseRules x = fail $ "Unknown context" ++ x
