19%option never-interactive
22%option extra-type="struct xmlYY_state *"
35#define YY_NEVER_INTERACTIVE 1
37#define YY_NO_UNISTD_H 1
75#define YY_INPUT(buf,result,max_size) result=yyread(yyscanner,buf,max_size);
std::unordered_map< std::string, std::string > Attributes
static const char * stateToString(int state)
void msg(const char *fmt,...)
XMLHandlers::Attributes attrs
std::vector< std::string > xpath
int inputPosition
read offset during parsing
const char * inputString
the code fragment as text
std::function< XMLParser::Transcode > transcodeFunc
static int yyread(yyscan_t yyscanner, char *buf, int max_size)
static void addCharacters(yyscan_t yyscanner)
static void reportError(yyscan_t yyscanner, const std::string &msg)
static void addAttribute(yyscan_t yyscanner)
static void addElement(yyscan_t yyscanner)
static void countLines(yyscan_t yyscanner, const char *txt, yy_size_t len)
static std::string processData(yyscan_t yyscanner, const char *txt, yy_size_t len)
static void initElement(yyscan_t yyscanner)
85NAMESTART [:A-Za-z\200-\377_]
86NAMECHAR [:A-Za-z\200-\377_0-9.-]
87NAME {NAMESTART}{NAMECHAR}*
88ESC "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";"
93STRING \"([^"&]|{ESC})*\"|\'([^'&]|{ESC})*\'
94DOCTYPE {SP}?"<!DOCTYPE"{SP}
static int countLines(yyscan_t yyscanner)
114 {DOCTYPE} {
countLines(yyscanner,yytext,yyleng); }
115 {OPENSPECIAL} {
countLines(yyscanner,yytext,yyleng); BEGIN(Prolog); }
119 {COMMENT} { yyextra->commentContext = YY_START;
124 {CDATA} {
countLines(yyscanner,yytext,yyleng);
125 yyextra->cdataContext = YY_START;
128 {PCDATA} { yyextra->data +=
processData(yyscanner,yytext,yyleng); }
134 {COMMENT} { yyextra->commentContext = YY_START;
140 "/" { yyextra->isEnd =
true; }
141 {NAME} { yyextra->name = yytext;
151 "/" { yyextra->selfClose =
true; }
152 {NAME} { yyextra->attrName = yytext; }
153 "=" { BEGIN(AttributeValue); }
163 ['"] { yyextra->stringChar = *yytext;
164 yyextra->attrValue =
"";
167 . { std::string
msg = std::string(
"Missing attribute value. Unexpected character `")+yytext+
"` found";
174 [^'"\n]+ { yyextra->attrValue +=
processData(yyscanner,yytext,yyleng); }
175 ['"] {
if (*yytext==yyextra->stringChar)
182 yyextra->attrValue +=
processData(yyscanner,yytext,yyleng);
185 \n { yyextra->lineNr++; yyextra->attrValue+=
' '; }
188 {ENDCDATA} { BEGIN(yyextra->cdataContext); }
189 [^]\n]+ { yyextra->data += yytext; }
190 \n { yyextra->data += yytext;
193 . { yyextra->data += yytext; }
196 "encoding"\s*=\s*\"[^\"]*\" {
197 std::string encoding=yytext;
198 size_t i=encoding.find(
'"');
199 encoding=encoding.substr(i+1,yyleng-i-2);
200 if (encoding!=
"UTF-8")
202 yyextra->encoding=encoding;
205 {CLOSESPECIAL} {
countLines(yyscanner,yytext,yyleng);
208 \n { yyextra->lineNr++; }
212 {COMMENTEND} {
countLines(yyscanner,yytext,yyleng);
213 BEGIN(yyextra->commentContext);
216 \n { yyextra->lineNr++; }
219\n { yyextra->lineNr++; }
220. { std::string
msg =
"Unexpected character `";
232 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
233 int inputPosition = yyextra->inputPosition;
234 const char *s = yyextra->inputString + inputPosition;
236 while( c < max_size && *s)
241 yyextra->inputPosition += c;
247 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
248 for (yy_size_t i=0;i<len;i++)
250 if (txt[i]==
'\n') yyextra->lineNr++;
256 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
257 yyextra->isEnd =
false;
258 yyextra->selfClose =
false;
260 yyextra->attrs.clear();
265 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
266 if (yyextra->xpath.empty())
268 std::string
msg =
"found closing tag '"+yyextra->name+
"' without matching opening tag";
273 std::string expectedTagName = yyextra->xpath.back();
274 if (expectedTagName!=yyextra->name)
276 std::string
msg =
"Found closing tag '"+yyextra->name+
"' that does not match the opening tag '"+expectedTagName+
"' at the same level";
281 yyextra->xpath.pop_back();
288 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
291 yyextra->xpath.push_back(yyextra->name);
292 if (yyextra->handlers.startElement)
294 yyextra->handlers.startElement(yyextra->name,yyextra->attrs);
298 fprintf(stderr,
"%d: startElement(%s,attr=[",yyextra->lineNr,yyextra->name.data());
299 for (
auto attr : yyextra->attrs)
301 fprintf(stderr,
"%s='%s' ",attr.first.c_str(),attr.second.c_str());
303 fprintf(stderr,
"])\n");
306 if (yyextra->isEnd || yyextra->selfClose)
310 fprintf(stderr,
"%d: endElement(%s)\n",yyextra->lineNr,yyextra->name.data());
313 if (yyextra->handlers.endElement)
315 yyextra->handlers.endElement(yyextra->name);
322 const int l =
static_cast<int>(str.length());
324 while (s<l && isspace(str.at(s))) s++;
325 while (e>s && isspace(str.at(e))) e--;
326 return str.substr(s,1+e-s);
331 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
333 if (!yyextra->encoding.empty() && !yyextra->transcodeFunc(data,yyextra->encoding.c_str()))
335 reportError(yyscanner,
"failed to transcode string '"+data+
"' from encoding '"+yyextra->encoding+
"' to UTF-8");
337 if (yyextra->handlers.characters)
339 yyextra->handlers.characters(data);
345 fprintf(stderr,
"characters(%s)\n",data.c_str());
352 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
353 std::string val = yyextra->attrValue;
354 if (!yyextra->encoding.empty() && !yyextra->transcodeFunc(val,yyextra->encoding.c_str()))
356 reportError(yyscanner,
"failed to transcode string '"+val+
"' from encoding '"+yyextra->encoding+
"' to UTF-8");
358 yyextra->attrs.insert(std::make_pair(yyextra->attrName,val));
363 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
366 fprintf(stderr,
"%s:%d: Error '%s'\n",yyextra->fileName.c_str(),yyextra->lineNr,
msg.c_str());
368 if (yyextra->handlers.error)
370 yyextra->handlers.error(yyextra->fileName,yyextra->lineNr,
msg);
374static const char *
entities_enc[] = {
"amp",
"quot",
"gt",
"lt",
"apos" };
384 for (yy_size_t i=0; i<len; i++)
389 const int maxEntityLen = 10;
390 char entity[maxEntityLen+1];
391 entity[maxEntityLen]=
'\0';
392 for (yy_size_t j=0; j<maxEntityLen && i+j+1<len; j++)
396 entity[j]=txt[i+j+1];
416 std::string
msg = std::string(
"Invalid character entity '&") + entity +
";' found\n";
438 xmlYYlex_init_extra(&
p->xmlYY_extra,&
p->yyscanner);
439 p->xmlYY_extra.handlers = handlers;
444 xmlYYlex_destroy(
p->yyscanner);
448 const char *inputStr,
450 std::function<
void()> debugStart,
451 std::function<
void()> debugEnd,
455 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
458 xmlYYset_debug(debugEnabled?1:0,
p->yyscanner);
461 if (inputStr==
nullptr || inputStr[0]==
'\0')
return;
468 yyextra->inputString = inputStr;
469 yyextra->inputPosition = 0;
470 yyextra->transcodeFunc = transcodeFunc;
472 if (
static_cast<unsigned char>(inputStr[0])==0xEF &&
473 static_cast<unsigned char>(inputStr[1])==0xBB &&
474 static_cast<unsigned char>(inputStr[2])==0xBF)
476 yyextra->inputPosition = 3;
479 xmlYYrestart( 0, yyscanner );
481 if (yyextra->handlers.startDocument)
483 yyextra->handlers.startDocument();
486 if (yyextra->handlers.endDocument)
488 yyextra->handlers.endDocument();
491 if (!yyextra->xpath.empty())
493 std::string tagName = yyextra->xpath.back();
494 std::string
msg =
"End of file reached while expecting closing tag '"+tagName+
"'";
503 struct yyguts_t *yyg = (
struct yyguts_t*)
p->yyscanner;
504 return yyextra->lineNr;
509 struct yyguts_t *yyg = (
struct yyguts_t*)
p->yyscanner;
510 return yyextra->fileName;
Event handlers that can installed by the client and called while parsing a XML document.
int lineNr() const override
std::unique_ptr< Private > p
std::string fileName() const override
XMLParser(const XMLHandlers &handlers)
void parse(const char *fileName, const char *inputString, bool debugEnabled, std::function< void()> debugStart, std::function< void()> debugEnd, std::function< Transcode > transcoder=[](std::string &s, const char *){ return true;})
static int yyread(yyscan_t yyscanner, char *buf, int max_size)
struct xmlYY_state xmlYY_extra
static std::string trimSpaces(const std::string &str)
static const int num_entities
static const char entities_dec[]
static void checkAndUpdatePath(yyscan_t yyscanner)
static const char * entities_enc[]