17%option never-interactive
20%option extra-type="struct xmlYY_state *"
33#define YY_NEVER_INTERACTIVE 1
35#define YY_NO_UNISTD_H 1
73#define YY_INPUT(buf,result,max_size) result=yyread(yyscanner,buf,max_size);
Event handlers that can installed by the client and called while parsing a XML document.
std::unordered_map< std::string, std::string > Attributes
static const char * stateToString(int state)
XMLHandlers::Attributes attrs
std::vector< std::string > xpath
int inputPosition
read offset during parsing
const char * inputString
the code fragment as text
std::function< XMLParser::Transcode > transcodeFunc
static int yyread(yyscan_t yyscanner, char *buf, int max_size)
static void addCharacters(yyscan_t yyscanner)
static void reportError(yyscan_t yyscanner, const std::string &msg)
static void addAttribute(yyscan_t yyscanner)
static void addElement(yyscan_t yyscanner)
static void countLines(yyscan_t yyscanner, const char *txt, yy_size_t len)
static std::string processData(yyscan_t yyscanner, const char *txt, yy_size_t len)
static void initElement(yyscan_t yyscanner)
83NAMESTART [:A-Za-z\200-\377_]
84NAMECHAR [:A-Za-z\200-\377_0-9.-]
85NAME {NAMESTART}{NAMECHAR}*
86ESC "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";"
91STRING \"([^"&]|{ESC})*\"|\'([^'&]|{ESC})*\'
92DOCTYPE {SP}?"<!DOCTYPE"{SP}
static int countLines(yyscan_t yyscanner)
112 {DOCTYPE} {
countLines(yyscanner,yytext,yyleng); }
113 {OPENSPECIAL} {
countLines(yyscanner,yytext,yyleng); BEGIN(Prolog); }
117 {COMMENT} { yyextra->commentContext = YY_START;
122 {CDATA} {
countLines(yyscanner,yytext,yyleng);
123 yyextra->cdataContext = YY_START;
126 {PCDATA} { yyextra->data +=
processData(yyscanner,yytext,yyleng); }
132 {COMMENT} { yyextra->commentContext = YY_START;
138 "/" { yyextra->isEnd =
true; }
139 {NAME} { yyextra->name = yytext;
149 "/" { yyextra->selfClose =
true; }
150 {NAME} { yyextra->attrName = yytext; }
151 "=" { BEGIN(AttributeValue); }
161 ['"] { yyextra->stringChar = *yytext;
162 yyextra->attrValue =
"";
165 . { std::string
msg = std::string(
"Missing attribute value. Unexpected character `")+yytext+
"` found";
172 [^'"\n]+ { yyextra->attrValue +=
processData(yyscanner,yytext,yyleng); }
173 ['"] {
if (*yytext==yyextra->stringChar)
180 yyextra->attrValue +=
processData(yyscanner,yytext,yyleng);
183 \n { yyextra->lineNr++; yyextra->attrValue+=
' '; }
186 {ENDCDATA} { BEGIN(yyextra->cdataContext); }
187 [^]\n]+ { yyextra->data += yytext; }
188 \n { yyextra->data += yytext;
191 . { yyextra->data += yytext; }
194 "encoding"\s*=\s*\"[^\"]*\" {
195 std::string encoding=yytext;
196 size_t i=encoding.find(
'"');
197 encoding=encoding.substr(i+1,yyleng-i-2);
198 if (encoding!=
"UTF-8")
200 yyextra->encoding=encoding;
203 {CLOSESPECIAL} {
countLines(yyscanner,yytext,yyleng);
206 \n { yyextra->lineNr++; }
210 {COMMENTEND} {
countLines(yyscanner,yytext,yyleng);
211 BEGIN(yyextra->commentContext);
214 \n { yyextra->lineNr++; }
217\n { yyextra->lineNr++; }
218. { std::string
msg =
"Unexpected character `";
230 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
231 int inputPosition = yyextra->inputPosition;
232 const char *s = yyextra->inputString + inputPosition;
234 while( c < max_size && *s)
239 yyextra->inputPosition += c;
245 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
246 for (yy_size_t i=0;i<len;i++)
248 if (txt[i]==
'\n') yyextra->lineNr++;
254 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
255 yyextra->isEnd =
false;
256 yyextra->selfClose =
false;
258 yyextra->attrs.clear();
263 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
264 if (yyextra->xpath.empty())
266 std::string
msg =
"found closing tag '"+yyextra->name+
"' without matching opening tag";
271 std::string expectedTagName = yyextra->xpath.back();
272 if (expectedTagName!=yyextra->name)
274 std::string
msg =
"Found closing tag '"+yyextra->name+
"' that does not match the opening tag '"+expectedTagName+
"' at the same level";
279 yyextra->xpath.pop_back();
286 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
289 yyextra->xpath.push_back(yyextra->name);
290 if (yyextra->handlers.startElement)
292 yyextra->handlers.startElement(yyextra->name,yyextra->attrs);
296 fprintf(stderr,
"%d: startElement(%s,attr=[",yyextra->lineNr,yyextra->name.data());
297 for (
auto attr : yyextra->attrs)
299 fprintf(stderr,
"%s='%s' ",attr.first.c_str(),attr.second.c_str());
301 fprintf(stderr,
"])\n");
304 if (yyextra->isEnd || yyextra->selfClose)
308 fprintf(stderr,
"%d: endElement(%s)\n",yyextra->lineNr,yyextra->name.data());
311 if (yyextra->handlers.endElement)
313 yyextra->handlers.endElement(yyextra->name);
318static std::string
trimSpaces(
const std::string &str)
320 const int l =
static_cast<int>(str.length());
322 while (s<l && isspace(str.at(s))) s++;
323 while (e>s && isspace(str.at(e))) e--;
324 return str.substr(s,1+e-s);
329 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
331 if (!yyextra->encoding.empty() && !yyextra->transcodeFunc(data,yyextra->encoding.c_str()))
333 reportError(yyscanner,
"failed to transcode string '"+data+
"' from encoding '"+yyextra->encoding+
"' to UTF-8");
335 if (yyextra->handlers.characters)
337 yyextra->handlers.characters(data);
343 fprintf(stderr,
"characters(%s)\n",data.c_str());
350 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
351 std::string val = yyextra->attrValue;
352 if (!yyextra->encoding.empty() && !yyextra->transcodeFunc(val,yyextra->encoding.c_str()))
354 reportError(yyscanner,
"failed to transcode string '"+val+
"' from encoding '"+yyextra->encoding+
"' to UTF-8");
356 yyextra->attrs.insert(std::make_pair(yyextra->attrName,val));
361 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
364 fprintf(stderr,
"%s:%d: Error '%s'\n",yyextra->fileName.c_str(),yyextra->lineNr,
msg.c_str());
366 if (yyextra->handlers.error)
368 yyextra->handlers.error(yyextra->fileName,yyextra->lineNr,
msg);
372static const char *
entities_enc[] = {
"amp",
"quot",
"gt",
"lt",
"apos" };
373static const char entities_dec[] = {
'&',
'"',
'>',
'<',
'\'' };
382 for (yy_size_t i=0; i<len; i++)
387 const int maxEntityLen = 10;
388 char entity[maxEntityLen+1];
389 entity[maxEntityLen]=
'\0';
390 for (yy_size_t j=0; j<maxEntityLen && i+j+1<len; j++)
394 entity[j]=txt[i+j+1];
414 std::string
msg = std::string(
"Invalid character entity '&") + entity +
";' found\n";
436 xmlYYlex_init_extra(&
p->xmlYY_extra,&
p->yyscanner);
437 p->xmlYY_extra.handlers = handlers;
442 xmlYYlex_destroy(
p->yyscanner);
446 const char *inputStr,
448 std::function<
void()> debugStart,
449 std::function<
void()> debugEnd,
450 std::function<Transcode> transcodeFunc)
453 struct yyguts_t *yyg = (
struct yyguts_t*)yyscanner;
456 xmlYYset_debug(debugEnabled?1:0,
p->yyscanner);
459 if (inputStr==
nullptr || inputStr[0]==
'\0')
return;
466 yyextra->inputString = inputStr;
467 yyextra->inputPosition = 0;
468 yyextra->transcodeFunc = transcodeFunc;
470 if (
static_cast<unsigned char>(inputStr[0])==0xEF &&
471 static_cast<unsigned char>(inputStr[1])==0xBB &&
472 static_cast<unsigned char>(inputStr[2])==0xBF)
474 yyextra->inputPosition = 3;
477 xmlYYrestart( 0, yyscanner );
479 if (yyextra->handlers.startDocument)
481 yyextra->handlers.startDocument();
484 if (yyextra->handlers.endDocument)
486 yyextra->handlers.endDocument();
489 if (!yyextra->xpath.empty())
491 std::string tagName = yyextra->xpath.back();
492 std::string
msg =
"End of file reached while expecting closing tag '"+tagName+
"'";
501 struct yyguts_t *yyg = (
struct yyguts_t*)
p->yyscanner;
502 return yyextra->lineNr;
507 struct yyguts_t *yyg = (
struct yyguts_t*)
p->yyscanner;
508 return yyextra->fileName;
int lineNr() const override
std::unique_ptr< Private > p
std::string fileName() const override
XMLParser(const XMLHandlers &handlers)
void parse(const char *fileName, const char *inputString, bool debugEnabled, std::function< void()> debugStart, std::function< void()> debugEnd, std::function< Transcode > transcoder=[](std::string &s, const char *){ return true;})
static int yyread(yyscan_t yyscanner, char *buf, int max_size)
struct xmlYY_state xmlYY_extra
static std::string trimSpaces(const std::string &str)
static const int num_entities
static const char entities_dec[]
static void checkAndUpdatePath(yyscan_t yyscanner)
static const char * entities_enc[]