Doxygen
Loading...
Searching...
No Matches
markdown.cpp
Go to the documentation of this file.
1/******************************************************************************
2 *
3 * Copyright (C) 1997-2020 by Dimitri van Heesch.
4 *
5 * Permission to use, copy, modify, and distribute this software and its
6 * documentation under the terms of the GNU General Public License is hereby
7 * granted. No representations are made about the suitability of this software
8 * for any purpose. It is provided "as is" without express or implied warranty.
9 * See the GNU General Public License for more details.
10 *
11 * Documents produced by Doxygen are derivative works derived from the
12 * input used in their production; they are not affected by this license.
13 *
14 */
15
16/* Note: part of the code below is inspired by libupskirt written by
17 * Natacha Porté. Original copyright message follows:
18 *
19 * Copyright (c) 2008, Natacha Porté
20 *
21 * Permission to use, copy, modify, and distribute this software for any
22 * purpose with or without fee is hereby granted, provided that the above
23 * copyright notice and this permission notice appear in all copies.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
26 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
27 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
28 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
29 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
30 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
31 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
32 */
33
34#include <stdio.h>
35
36#include <unordered_map>
37#include <functional>
38#include <atomic>
39#include <array>
40#include <string_view>
41
42#include "markdown.h"
43#include "debug.h"
44#include "util.h"
45#include "doxygen.h"
46#include "commentscan.h"
47#include "entry.h"
48#include "config.h"
49#include "message.h"
50#include "portable.h"
51#include "regex.h"
52#include "fileinfo.h"
53#include "trace.h"
54#include "anchor.h"
55#include "stringutil.h"
56
57#if !ENABLE_MARKDOWN_TRACING
58#undef AUTO_TRACE
59#undef AUTO_TRACE_ADD
60#undef AUTO_TRACE_EXIT
61#define AUTO_TRACE(...) (void)0
62#define AUTO_TRACE_ADD(...) (void)0
63#define AUTO_TRACE_EXIT(...) (void)0
64#endif
65
67{
68 explicitPage, /**< docs start with a page command */
69 explicitMainPage, /**< docs start with a mainpage command */
70 explicitDirPage, /**< docs start with a dir command */
71 notExplicit /**< docs doesn't start with either page or mainpage */
72};
73
74//-----------
75
76// is character c part of an identifier?
77#define isIdChar(c) \
78 ((c>='a' && c<='z') || \
79 (c>='A' && c<='Z') || \
80 (c>='0' && c<='9') || \
81 (static_cast<unsigned char>(c)>=0x80)) // unicode characters
82
83// is character allowed right at the beginning of an emphasis section
84#define extraChar(c) \
85 (c=='-' || c=='+' || c=='!' || \
86 c=='?' || c=='$' || c=='@' || \
87 c=='&' || c=='*' || c=='%' || \
88 c=='[' || c=='(' || c=='.' || \
89 c=='>' || c==':' || c==',' || \
90 c==';' || c=='\'' || c=='"' || c=='`')
91
92// is character at position i in data allowed before an emphasis section
93#define isOpenEmphChar(c) \
94 (c=='\n' || c==' ' || c=='\'' || c=='<' || \
95 c=='>' || c=='{' || c=='(' || c=='[' || \
96 c==',' || c==':' || c==';')
97
98// is character at position i in data an escape that prevents ending an emphasis section
99// so for example *bla (*.txt) is cool*
100#define ignoreCloseEmphChar(c,cn) \
101 (c=='(' || c=='{' || c=='[' || (c=='<' && cn!='/') || \
102 c=='\\' || \
103 c=='@')
104//----------
105
106struct TableCell
107{
108 TableCell() : colSpan(false) {}
109 QCString cellText;
110 bool colSpan;
111};
112
113struct Markdown::Private
114{
115 Private(const QCString &fn,int line,int indent)
116 : fileName(fn), lineNr(line), indentLevel(indent)
117 {
118 // setup callback table for special characters
119 actions[static_cast<unsigned int>('_')] = [this](std::string_view data,size_t offset) { return processEmphasis (data,offset); };
120 actions[static_cast<unsigned int>('*')] = [this](std::string_view data,size_t offset) { return processEmphasis (data,offset); };
121 actions[static_cast<unsigned int>('~')] = [this](std::string_view data,size_t offset) { return processEmphasis (data,offset); };
122 actions[static_cast<unsigned int>('`')] = [this](std::string_view data,size_t offset) { return processCodeSpan (data,offset); };
123 actions[static_cast<unsigned int>('\\')]= [this](std::string_view data,size_t offset) { return processSpecialCommand(data,offset); };
124 actions[static_cast<unsigned int>('@')] = [this](std::string_view data,size_t offset) { return processSpecialCommand(data,offset); };
125 actions[static_cast<unsigned int>('[')] = [this](std::string_view data,size_t offset) { return processLink (data,offset); };
126 actions[static_cast<unsigned int>('!')] = [this](std::string_view data,size_t offset) { return processLink (data,offset); };
127 actions[static_cast<unsigned int>('<')] = [this](std::string_view data,size_t offset) { return processHtmlTag (data,offset); };
128 actions[static_cast<unsigned int>('-')] = [this](std::string_view data,size_t offset) { return processNmdash (data,offset); };
129 actions[static_cast<unsigned int>('"')] = [this](std::string_view data,size_t offset) { return processQuoted (data,offset); };
130 }
131
132 QCString processQuotations(std::string_view data,size_t refIndent);
133 QCString processBlocks(std::string_view data,size_t indent);
134 QCString isBlockCommand(std::string_view data,size_t offset);
135 size_t isSpecialCommand(std::string_view data,size_t offset);
136 size_t findEndOfLine(std::string_view data,size_t offset);
137 int processHtmlTagWrite(std::string_view data,size_t offset,bool doWrite);
138 int processHtmlTag(std::string_view data,size_t offset);
139 int processEmphasis(std::string_view data,size_t offset);
140 int processEmphasis1(std::string_view data,char c);
141 int processEmphasis2(std::string_view data,char c);
142 int processEmphasis3(std::string_view data,char c);
143 int processNmdash(std::string_view data,size_t offset);
144 int processQuoted(std::string_view data,size_t offset);
145 int processCodeSpan(std::string_view data,size_t offset);
146 int processSpecialCommand(std::string_view data,size_t offset);
147 int processLink(std::string_view data,size_t offset);
148 size_t findEmphasisChar(std::string_view, char c, size_t c_size);
149 void addStrEscapeUtf8Nbsp(std::string_view data);
150 void processInline(std::string_view data);
151 void writeMarkdownImage(std::string_view fmt, bool inline_img, bool explicitTitle,
152 const QCString &title, const QCString &content,
153 const QCString &link, const QCString &attributes,
154 const FileDef *fd);
155 int isHeaderline(std::string_view data, bool allowAdjustLevel);
156 int isAtxHeader(std::string_view data, QCString &header,QCString &id,bool allowAdjustLevel,
157 bool *pIsIdGenerated=nullptr);
158 void writeOneLineHeaderOrRuler(std::string_view data);
159 void writeFencedCodeBlock(std::string_view data, std::string_view lang,
160 size_t blockStart,size_t blockEnd);
161 size_t writeBlockQuote(std::string_view data);
162 size_t writeCodeBlock(std::string_view,size_t refIndent);
163 size_t writeTableBlock(std::string_view data);
164 QCString extractTitleId(QCString &title, int level,bool *pIsIdGenerated=nullptr);
165
166 struct LinkRef
167 {
168 LinkRef(const QCString &l,const QCString &t) : link(l), title(t) {}
171 };
172 using Action_t = std::function<int(std::string_view,size_t)>;
173
174 std::unordered_map<std::string,LinkRef> linkRefs;
176 int lineNr = 0;
177 int indentLevel=0; // 0 is outside markdown, -1=page level
179 std::array<Action_t,256> actions;
180};
181
182Markdown::Markdown(const QCString &fileName,int lineNr,int indentLevel)
183 : prv(std::make_unique<Private>(fileName,lineNr,indentLevel))
184{
185 using namespace std::placeholders;
186 (void)lineNr; // not used yet
187}
188
189Markdown::~Markdown() = default;
190
191void Markdown::setIndentLevel(int level) { prv->indentLevel = level; }
192
193
195
196
197//---------- constants -------
198//
199const char *g_utf8_nbsp = "\xc2\xa0"; // UTF-8 nbsp
200const char *g_doxy_nbsp = "&_doxy_nbsp;"; // doxygen escape command for UTF-8 nbsp
201const size_t codeBlockIndent = 4;
202
203//---------- helpers -------
204
205// test if the next characters in data represent a new line (which can be character \n or string \ilinebr).
206// returns 0 if no newline is found, or the number of characters that make up the newline if found.
207inline size_t isNewline(std::string_view data)
208{
209 // normal newline
210 if (data[0] == '\n') return 1;
211 // artificial new line from ^^ in ALIASES
212 if (data[0] == '\\' && qstrncmp(data.data()+1,"ilinebr ",7)==0) return data[8]==' ' ? 9 : 8;
213 return 0;
214}
215
216// escape double quotes in string
218{
219 AUTO_TRACE("s={}",Trace::trunc(s));
220 if (s.isEmpty()) return s;
221 QCString result;
222 const char *p=s.data();
223 char c=0, pc='\0';
224 while ((c=*p++))
225 {
226 if (c=='"' && pc!='\\') result+='\\';
227 result+=c;
228 pc=c;
229 }
230 AUTO_TRACE_EXIT("result={}",result);
231 return result;
232}
233
234// escape characters that have a special meaning later on.
236{
237 AUTO_TRACE("s={}",Trace::trunc(s));
238 if (s.isEmpty()) return s;
239 bool insideQuote=FALSE;
240 QCString result;
241 const char *p=s.data();
242 char c=0, pc='\0';
243 while ((c=*p++))
244 {
245 switch (c)
246 {
247 case '"':
248 if (pc!='\\') { insideQuote=!insideQuote; }
249 result+=c;
250 break;
251 case '<':
252 // fall through
253 case '>':
254 if (!insideQuote)
255 {
256 result+='\\';
257 result+=c;
258 if ((p[0]==':') && (p[1]==':'))
259 {
260 result+='\\';
261 result+=':';
262 p++;
263 }
264 }
265 else
266 {
267 result+=c;
268 }
269 break;
270 case '\\': if (!insideQuote) { result+='\\'; } result+='\\'; break;
271 case '@': if (!insideQuote) { result+='\\'; } result+='@'; break;
272 // commented out next line due to regression when using % to suppress a link
273 //case '%': if (!insideQuote) { result+='\\'; } result+='%'; break;
274 case '#': if (!insideQuote) { result+='\\'; } result+='#'; break;
275 case '$': if (!insideQuote) { result+='\\'; } result+='$'; break;
276 case '&': if (!insideQuote) { result+='\\'; } result+='&'; break;
277 default:
278 result+=c; break;
279 }
280 pc=c;
281 }
282 AUTO_TRACE_EXIT("result={}",result);
283 return result;
284}
285
286/** helper function to convert presence of left and/or right alignment markers
287 * to an alignment value
288 */
289static Alignment markersToAlignment(bool leftMarker,bool rightMarker)
290{
291 if (leftMarker && rightMarker)
292 {
293 return AlignCenter;
294 }
295 else if (leftMarker)
296 {
297 return AlignLeft;
298 }
299 else if (rightMarker)
300 {
301 return AlignRight;
302 }
303 else
304 {
305 return AlignNone;
306 }
307}
308
309/** parse the image attributes and return attributes for given format */
310static QCString getFilteredImageAttributes(std::string_view fmt, const QCString &attrs)
311{
312 AUTO_TRACE("fmt={} attrs={}",fmt,attrs);
313 StringVector attrList = split(attrs.str(),",");
314 for (const auto &attr_ : attrList)
315 {
316 QCString attr = QCString(attr_).stripWhiteSpace();
317 int i = attr.find(':');
318 if (i>0) // has format
319 {
320 QCString format = attr.left(i).stripWhiteSpace().lower();
321 if (format == fmt) // matching format
322 {
323 AUTO_TRACE_EXIT("result={}",attr.mid(i+1));
324 return attr.mid(i+1); // keep part after :
325 }
326 }
327 else // option that applies to all formats
328 {
329 AUTO_TRACE_EXIT("result={}",attr);
330 return attr;
331 }
332 }
333 return QCString();
334}
335
336// Check if data contains a block command. If so returned the command
337// that ends the block. If not an empty string is returned.
338// Note When offset>0 character position -1 will be inspected.
339//
340// Checks for and skip the following block commands:
341// {@code .. { .. } .. }
342// \dot .. \enddot
343// \code .. \endcode
344// \msc .. \endmsc
345// \f$..\f$
346// \f(..\f)
347// \f[..\f]
348// \f{..\f}
349// \verbatim..\endverbatim
350// \iliteral..\endiliteral
351// \latexonly..\endlatexonly
352// \htmlonly..\endhtmlonly
353// \xmlonly..\endxmlonly
354// \rtfonly..\endrtfonly
355// \manonly..\endmanonly
356// \startuml..\enduml
357QCString Markdown::Private::isBlockCommand(std::string_view data,size_t offset)
358{
359 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
360
361 using EndBlockFunc = QCString (*)(const std::string &,bool,char);
362
363 static const auto getEndBlock = [](const std::string &blockName,bool,char) -> QCString
364 {
365 return "end"+blockName;
366 };
367 static const auto getEndCode = [](const std::string &blockName,bool openBracket,char) -> QCString
368 {
369 return openBracket ? QCString("}") : "end"+blockName;
370 };
371 static const auto getEndUml = [](const std::string &/* blockName */,bool,char) -> QCString
372 {
373 return "enduml";
374 };
375 static const auto getEndFormula = [](const std::string &/* blockName */,bool,char nextChar) -> QCString
376 {
377 switch (nextChar)
378 {
379 case '$': return "f$";
380 case '(': return "f)";
381 case '[': return "f]";
382 case '{': return "f}";
383 }
384 return "";
385 };
386
387 // table mapping a block start command to a function that can return the matching end block string
388 static const std::unordered_map<std::string,EndBlockFunc> blockNames =
389 {
390 { "dot", getEndBlock },
391 { "code", getEndCode },
392 { "icode", getEndBlock },
393 { "msc", getEndBlock },
394 { "verbatim", getEndBlock },
395 { "iverbatim", getEndBlock },
396 { "iliteral", getEndBlock },
397 { "latexonly", getEndBlock },
398 { "htmlonly", getEndBlock },
399 { "xmlonly", getEndBlock },
400 { "rtfonly", getEndBlock },
401 { "manonly", getEndBlock },
402 { "docbookonly", getEndBlock },
403 { "startuml", getEndUml },
404 { "f", getEndFormula }
405 };
406
407 const size_t size = data.size();
408 bool openBracket = offset>0 && data.data()[-1]=='{';
409 bool isEscaped = offset>0 && (data.data()[-1]=='\\' || data.data()[-1]=='@');
410 if (isEscaped) return QCString();
411
412 size_t end=1;
413 while (end<size && (data[end]>='a' && data[end]<='z')) end++;
414 if (end==1) return QCString();
415 std::string blockName(data.substr(1,end-1));
416 auto it = blockNames.find(blockName);
417 QCString result;
418 if (it!=blockNames.end()) // there is a function assigned
419 {
420 result = it->second(blockName, openBracket, end<size ? data[end] : 0);
421 }
422 AUTO_TRACE_EXIT("result={}",result);
423 return result;
424}
425
426size_t Markdown::Private::isSpecialCommand(std::string_view data,size_t offset)
427{
428 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
429
430 using EndCmdFunc = size_t (*)(std::string_view,size_t);
431
432 static const auto endOfLine = [](std::string_view data_,size_t offset_) -> size_t
433 {
434 // skip until the end of line (allowing line continuation characters)
435 char lc = 0;
436 char c = 0;
437 while (offset_<data_.size() && ((c=data_[offset_])!='\n' || lc=='\\'))
438 {
439 if (c=='\\') lc='\\'; // last character was a line continuation
440 else if (c!=' ') lc=0; // rest line continuation
441 offset_++;
442 }
443 return offset_;
444 };
445
446 static const auto endOfLabels = [](std::string_view data_,size_t offset_,bool multi_) -> size_t
447 {
448 if (offset_<data_.size() && data_[offset_]==' ') // we expect a space before the label
449 {
450 char c = 0;
451 offset_++;
452 bool done=false;
453 while (!done)
454 {
455 // skip over spaces
456 while (offset_<data_.size() && data_[offset_]==' ')
457 {
458 offset_++;
459 }
460 // skip over label
461 while (offset_<data_.size() && (c=data_[offset_])!=' ' && c!=',' && c!='\\' && c!='@' && c!='\n')
462 {
463 offset_++;
464 }
465 // optionally skip over a comma separated list of labels
466 if (multi_ && offset_<data_.size() && (data_[offset_]==',' || data_[offset_]==' '))
467 {
468 size_t off = offset_;
469 while (off<data_.size() && data_[off]==' ')
470 {
471 off++;
472 }
473 if (off<data_.size() && data_[off]==',')
474 {
475 offset_ = ++off;
476 }
477 else // no next label found
478 {
479 done=true;
480 }
481 }
482 else
483 {
484 done=true;
485 }
486 }
487 return offset_;
488 }
489 return 0;
490 };
491
492 static const auto endOfLabel = [](std::string_view data_,size_t offset_) -> size_t
493 {
494 return endOfLabels(data_,offset_,false);
495 };
496
497 static const auto endOfLabelOpt = [](std::string_view data_,size_t offset_) -> size_t
498 {
499 size_t index=offset_;
500 if (index<data_.size() && data_[index]==' ') // skip over optional spaces
501 {
502 index++;
503 while (index<data_.size() && data_[index]==' ') index++;
504 }
505 if (index<data_.size() && data_[index]=='{') // find matching '}'
506 {
507 index++;
508 char c = 0;
509 while (index<data_.size() && (c=data_[index])!='}' && c!='\\' && c!='@' && c!='\n') index++;
510 if (index==data_.size() || data_[index]!='}') return 0; // invalid option
511 offset_=index+1; // part after {...} is the option
512 }
513 return endOfLabel(data_,offset_);
514 };
515
516 static const auto endOfParam = [](std::string_view data_,size_t offset_) -> size_t
517 {
518 size_t index=offset_;
519 if (index<data_.size() && data_[index]==' ') // skip over optional spaces
520 {
521 index++;
522 while (index<data_.size() && data_[index]==' ') index++;
523 }
524 if (index<data_.size() && data_[index]=='[') // find matching ']'
525 {
526 index++;
527 char c = 0;
528 while (index<data_.size() && (c=data_[index])!=']' && c!='\n') index++;
529 if (index==data_.size() || data_[index]!=']') return 0; // invalid parameter
530 offset_=index+1; // part after [...] is the parameter name
531 }
532 return endOfLabels(data_,offset_,true);
533 };
534
535 static const auto endOfRetVal = [](std::string_view data_,size_t offset_) -> size_t
536 {
537 return endOfLabels(data_,offset_,true);
538 };
539
540 static const auto endOfFuncLike = [](std::string_view data_,size_t offset_,bool allowSpaces) -> size_t
541 {
542 if (offset_<data_.size() && data_[offset_]==' ') // we expect a space before the name
543 {
544 char c=0;
545 offset_++;
546 // skip over spaces
547 while (offset_<data_.size() && data_[offset_]==' ')
548 {
549 offset_++;
550 }
551 // skip over name (and optionally type)
552 while (offset_<data_.size() && (c=data_[offset_])!='\n' && (allowSpaces || c!=' ') && c!='(')
553 {
554 offset_++;
555 }
556 if (c=='(') // find the end of the function
557 {
558 int count=1;
559 offset_++;
560 while (offset_<data_.size() && (c=data_[offset_++]))
561 {
562 if (c=='(') count++;
563 else if (c==')') count--;
564 if (count==0) return offset_;
565 }
566 }
567 return offset_;
568 }
569 return 0;
570 };
571
572 static const auto endOfFunc = [](std::string_view data_,size_t offset_) -> size_t
573 {
574 return endOfFuncLike(data_,offset_,true);
575 };
576
577 static const auto endOfGuard = [](std::string_view data_,size_t offset_) -> size_t
578 {
579 return endOfFuncLike(data_,offset_,false);
580 };
581
582 static const std::unordered_map<std::string,EndCmdFunc> cmdNames =
583 {
584 { "a", endOfLabel },
585 { "addindex", endOfLine },
586 { "addtogroup", endOfLabel },
587 { "anchor", endOfLabel },
588 { "b", endOfLabel },
589 { "c", endOfLabel },
590 { "category", endOfLine },
591 { "cite", endOfLabel },
592 { "class", endOfLine },
593 { "concept", endOfLine },
594 { "copybrief", endOfFunc },
595 { "copydetails", endOfFunc },
596 { "copydoc", endOfFunc },
597 { "def", endOfFunc },
598 { "defgroup", endOfLabel },
599 { "diafile", endOfLine },
600 { "dir", endOfLine },
601 { "dockbookinclude",endOfLine },
602 { "dontinclude", endOfLine },
603 { "dotfile", endOfLine },
604 { "e", endOfLabel },
605 { "elseif", endOfGuard },
606 { "em", endOfLabel },
607 { "emoji", endOfLabel },
608 { "enum", endOfLabel },
609 { "example", endOfLine },
610 { "exception", endOfLine },
611 { "extends", endOfLabel },
612 { "file", endOfLine },
613 { "fn", endOfFunc },
614 { "headerfile", endOfLine },
615 { "htmlinclude", endOfLine },
616 { "ianchor", endOfLabelOpt },
617 { "idlexcept", endOfLine },
618 { "if", endOfGuard },
619 { "ifnot", endOfGuard },
620 { "image", endOfLine },
621 { "implements", endOfLine },
622 { "include", endOfLine },
623 { "includedoc", endOfLine },
624 { "includelineno", endOfLine },
625 { "ingroup", endOfLabel },
626 { "interface", endOfLine },
627 { "latexinclude", endOfLine },
628 { "maninclude", endOfLine },
629 { "memberof", endOfLabel },
630 { "mscfile", endOfLine },
631 { "namespace", endOfLabel },
632 { "noop", endOfLine },
633 { "overload", endOfLine },
634 { "p", endOfLabel },
635 { "package", endOfLabel },
636 { "page", endOfLabel },
637 { "paragraph", endOfLabel },
638 { "param", endOfParam },
639 { "property", endOfLine },
640 { "protocol", endOfLine },
641 { "qualifier", endOfLine },
642 { "ref", endOfLabel },
643 { "refitem", endOfLine },
644 { "related", endOfLabel },
645 { "relatedalso", endOfLabel },
646 { "relates", endOfLabel },
647 { "relatesalso", endOfLabel },
648 { "retval", endOfRetVal},
649 { "rtfinclude", endOfLine },
650 { "section", endOfLabel },
651 { "skip", endOfLine },
652 { "skipline", endOfLine },
653 { "snippet", endOfLine },
654 { "snippetdoc", endOfLine },
655 { "snippetlineno", endOfLine },
656 { "struct", endOfLine },
657 { "subpage", endOfLabel },
658 { "subparagraph", endOfLabel },
659 { "subsubparagraph",endOfLabel },
660 { "subsection", endOfLabel },
661 { "subsubsection", endOfLabel },
662 { "throw", endOfLabel },
663 { "throws", endOfLabel },
664 { "tparam", endOfLabel },
665 { "typedef", endOfLine },
666 { "plantumlfile", endOfLine },
667 { "union", endOfLine },
668 { "until", endOfLine },
669 { "var", endOfLine },
670 { "verbinclude", endOfLine },
671 { "weakgroup", endOfLabel },
672 { "xmlinclude", endOfLine },
673 { "xrefitem", endOfLabel }
674 };
675
676 bool isEscaped = offset>0 && (data.data()[-1]=='\\' || data.data()[-1]=='@');
677 if (isEscaped) return 0;
678
679 const size_t size = data.size();
680 size_t end=1;
681 while (end<size && (data[end]>='a' && data[end]<='z')) end++;
682 if (end==1) return 0;
683 std::string cmdName(data.substr(1,end-1));
684 size_t result=0;
685 auto it = cmdNames.find(cmdName);
686 if (it!=cmdNames.end()) // command with parameters that should be ignored by markdown
687 {
688 // find the end of the parameters
689 result = it->second(data,end);
690 }
691 AUTO_TRACE_EXIT("result={}",result);
692 return result;
693}
694
695/** looks for the next emph char, skipping other constructs, and
696 * stopping when either it is found, or we are at the end of a paragraph.
697 */
698size_t Markdown::Private::findEmphasisChar(std::string_view data, char c, size_t c_size)
699{
700 AUTO_TRACE("data='{}' c={} c_size={}",Trace::trunc(data),c,c_size);
701 size_t i = 1;
702 const size_t size = data.size();
703
704 while (i<size)
705 {
706 while (i<size && data[i]!=c &&
707 data[i]!='\\' && data[i]!='@' &&
708 !(data[i]=='/' && data[i-1]=='<') && // html end tag also ends emphasis
709 data[i]!='\n') i++;
710 // avoid overflow (unclosed emph token)
711 if (i==size)
712 {
713 return 0;
714 }
715 //printf("findEmphasisChar: data=[%s] i=%d c=%c\n",data,i,data[i]);
716
717 // not counting escaped chars or characters that are unlikely
718 // to appear as the end of the emphasis char
719 if (ignoreCloseEmphChar(data[i-1],data[i]))
720 {
721 i++;
722 continue;
723 }
724 else
725 {
726 // get length of emphasis token
727 size_t len = 0;
728 while (i+len<size && data[i+len]==c)
729 {
730 len++;
731 }
732
733 if (len>0)
734 {
735 if (len!=c_size || (i+len<size && isIdChar(data[i+len]))) // to prevent touching some_underscore_identifier
736 {
737 i+=len;
738 continue;
739 }
740 AUTO_TRACE_EXIT("result={}",i);
741 return static_cast<int>(i); // found it
742 }
743 }
744
745 // skipping a code span
746 if (data[i]=='`')
747 {
748 int snb=0;
749 while (i<size && data[i]=='`') snb++,i++;
750
751 // find same pattern to end the span
752 int enb=0;
753 while (i<size && enb<snb)
754 {
755 if (data[i]=='`') enb++;
756 if (snb==1 && data[i]=='\'') break; // ` ended by '
757 i++;
758 }
759 }
760 else if (data[i]=='@' || data[i]=='\\')
761 { // skip over blocks that should not be processed
762 QCString endBlockName = isBlockCommand(data.substr(i),i);
763 if (!endBlockName.isEmpty())
764 {
765 i++;
766 size_t l = endBlockName.length();
767 while (i+l<size)
768 {
769 if ((data[i]=='\\' || data[i]=='@') && // command
770 data[i-1]!='\\' && data[i-1]!='@') // not escaped
771 {
772 if (qstrncmp(&data[i+1],endBlockName.data(),l)==0)
773 {
774 break;
775 }
776 }
777 i++;
778 }
779 }
780 else if (i+1<size && isIdChar(data[i+1])) // @cmd, stop processing, see bug 690385
781 {
782 return 0;
783 }
784 else
785 {
786 i++;
787 }
788 }
789 else if (data[i-1]=='<' && data[i]=='/') // html end tag invalidates emphasis
790 {
791 return 0;
792 }
793 else if (data[i]=='\n') // end * or _ at paragraph boundary
794 {
795 i++;
796 while (i<size && data[i]==' ') i++;
797 if (i>=size || data[i]=='\n')
798 {
799 return 0;
800 } // empty line -> paragraph
801 }
802 else // should not get here!
803 {
804 i++;
805 }
806 }
807 return 0;
808}
809
810/** process single emphasis */
811int Markdown::Private::processEmphasis1(std::string_view data, char c)
812{
813 AUTO_TRACE("data='{}' c={}",Trace::trunc(data),c);
814 size_t i = 0;
815 const size_t size = data.size();
816
817 /* skipping one symbol if coming from emph3 */
818 if (size>1 && data[0]==c && data[1]==c) { i=1; }
819
820 while (i<size)
821 {
822 size_t len = findEmphasisChar(data.substr(i), c, 1);
823 if (len==0) { return 0; }
824 i+=len;
825 if (i>=size) { return 0; }
826
827 if (i+1<size && data[i+1]==c)
828 {
829 i++;
830 continue;
831 }
832 if (data[i]==c && data[i-1]!=' ' && data[i-1]!='\n')
833 {
834 out+="<em>";
835 processInline(data.substr(0,i));
836 out+="</em>";
837 AUTO_TRACE_EXIT("result={}",i+1);
838 return static_cast<int>(i+1);
839 }
840 }
841 return 0;
842}
843
844/** process double emphasis */
845int Markdown::Private::processEmphasis2(std::string_view data, char c)
846{
847 AUTO_TRACE("data='{}' c={}",Trace::trunc(data),c);
848 size_t i = 0;
849 const size_t size = data.size();
850
851 while (i<size)
852 {
853 size_t len = findEmphasisChar(data.substr(i), c, 2);
854 if (len==0)
855 {
856 return 0;
857 }
858 i += len;
859 if (i+1<size && data[i]==c && data[i+1]==c && i && data[i-1]!=' ' && data[i-1]!='\n')
860 {
861 if (c == '~') out+="<strike>";
862 else out+="<strong>";
863 processInline(data.substr(0,i));
864 if (c == '~') out+="</strike>";
865 else out+="</strong>";
866 AUTO_TRACE_EXIT("result={}",i+2);
867 return static_cast<int>(i+2);
868 }
869 i++;
870 }
871 return 0;
872}
873
874/** Parsing triple emphasis.
875 * Finds the first closing tag, and delegates to the other emph
876 */
877int Markdown::Private::processEmphasis3(std::string_view data,char c)
878{
879 AUTO_TRACE("data='{}' c={}",Trace::trunc(data),c);
880 size_t i = 0;
881 const size_t size = data.size();
882
883 while (i<size)
884 {
885 size_t len = findEmphasisChar(data.substr(i), c, 3);
886 if (len==0)
887 {
888 return 0;
889 }
890 i+=len;
891
892 /* skip whitespace preceded symbols */
893 if (data[i]!=c || data[i-1]==' ' || data[i-1]=='\n')
894 {
895 continue;
896 }
897
898 if (i+2<size && data[i+1]==c && data[i+2]==c)
899 {
900 out+="<em><strong>";
901 processInline(data.substr(0,i));
902 out+="</strong></em>";
903 AUTO_TRACE_EXIT("result={}",i+3);
904 return static_cast<int>(i+3);
905 }
906 else if (i+1<size && data[i+1]==c)
907 {
908 // double symbol found, handing over to emph1
909 len = processEmphasis1(std::string_view(data.data()-2, size+2), c);
910 if (len==0)
911 {
912 return 0;
913 }
914 else
915 {
916 AUTO_TRACE_EXIT("result={}",len-2);
917 return static_cast<int>(len - 2);
918 }
919 }
920 else
921 {
922 // single symbol found, handing over to emph2
923 len = processEmphasis2(std::string_view(data.data()-1, size+1), c);
924 if (len==0)
925 {
926 return 0;
927 }
928 else
929 {
930 AUTO_TRACE_EXIT("result={}",len-1);
931 return static_cast<int>(len - 1);
932 }
933 }
934 }
935 return 0;
936}
937
938/** Process ndash and mdashes */
939int Markdown::Private::processNmdash(std::string_view data,size_t offset)
940{
941 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
942 const size_t size = data.size();
943 // precondition: data[0]=='-'
944 size_t i=1;
945 int count=1;
946 if (i<size && data[i]=='-') // found --
947 {
948 count++,i++;
949 }
950 if (i<size && data[i]=='-') // found ---
951 {
952 count++,i++;
953 }
954 if (i<size && data[i]=='-') // found ----
955 {
956 count++;
957 }
958 if (count>=2 && offset>=2 && qstrncmp(data.data()-2,"<!",2)==0)
959 { AUTO_TRACE_EXIT("result={}",1-count); return 1-count; } // start HTML comment
960 if (count==2 && size > 2 && data[2]=='>')
961 { return 0; } // end HTML comment
962 if (count==3 && size > 3 && data[3]=='>')
963 { return 0; } // end HTML comment
964 if (count==2 && (offset<8 || qstrncmp(data.data()-8,"operator",8)!=0)) // -- => ndash
965 {
966 out+="&ndash;";
967 AUTO_TRACE_EXIT("result=2");
968 return 2;
969 }
970 else if (count==3) // --- => ndash
971 {
972 out+="&mdash;";
973 AUTO_TRACE_EXIT("result=3");
974 return 3;
975 }
976 // not an ndash or mdash
977 return 0;
978}
979
980/** Process quoted section "...", can contain one embedded newline */
981int Markdown::Private::processQuoted(std::string_view data,size_t)
982{
983 AUTO_TRACE("data='{}'",Trace::trunc(data));
984 const size_t size = data.size();
985 size_t i=1;
986 int nl=0;
987 while (i<size && data[i]!='"' && nl<2)
988 {
989 if (data[i]=='\n') nl++;
990 i++;
991 }
992 if (i<size && data[i]=='"' && nl<2)
993 {
994 out+=data.substr(0,i+1);
995 AUTO_TRACE_EXIT("result={}",i+2);
996 return static_cast<int>(i+1);
997 }
998 // not a quoted section
999 return 0;
1000}
1001
1002/** Process a HTML tag. Note that <pre>..</pre> are treated specially, in
1003 * the sense that all code inside is written unprocessed
1004 */
1005int Markdown::Private::processHtmlTagWrite(std::string_view data,size_t offset,bool doWrite)
1006{
1007 AUTO_TRACE("data='{}' offset={} doWrite={}",Trace::trunc(data),offset,doWrite);
1008 if (offset>0 && data.data()[-1]=='\\') { return 0; } // escaped <
1009
1010 const size_t size = data.size();
1011
1012 // find the end of the html tag
1013 size_t i=1;
1014 size_t l=0;
1015 // compute length of the tag name
1016 while (i<size && isIdChar(data[i])) i++,l++;
1017 QCString tagName(data.substr(1,i-1));
1018 if (tagName.lower()=="pre") // found <pre> tag
1019 {
1020 bool insideStr=FALSE;
1021 while (i+6<size)
1022 {
1023 char c=data[i];
1024 if (!insideStr && c=='<') // potential start of html tag
1025 {
1026 if (data[i+1]=='/' &&
1027 tolower(data[i+2])=='p' && tolower(data[i+3])=='r' &&
1028 tolower(data[i+4])=='e' && tolower(data[i+5])=='>')
1029 { // found </pre> tag, copy from start to end of tag
1030 if (doWrite) out+=data.substr(0,i+6);
1031 //printf("found <pre>..</pre> [%d..%d]\n",0,i+6);
1032 AUTO_TRACE_EXIT("result={}",i+6);
1033 return static_cast<int>(i+6);
1034 }
1035 }
1036 else if (insideStr && c=='"')
1037 {
1038 if (data[i-1]!='\\') insideStr=FALSE;
1039 }
1040 else if (c=='"')
1041 {
1042 insideStr=TRUE;
1043 }
1044 i++;
1045 }
1046 }
1047 else // some other html tag
1048 {
1049 if (l>0 && i<size)
1050 {
1051 if (data[i]=='/' && i+1<size && data[i+1]=='>') // <bla/>
1052 {
1053 //printf("Found htmlTag={%s}\n",qPrint(QCString(data).left(i+2)));
1054 if (doWrite) out+=data.substr(0,i+2);
1055 AUTO_TRACE_EXIT("result={}",i+2);
1056 return static_cast<int>(i+2);
1057 }
1058 else if (data[i]=='>') // <bla>
1059 {
1060 //printf("Found htmlTag={%s}\n",qPrint(QCString(data).left(i+1)));
1061 if (doWrite) out+=data.substr(0,i+1);
1062 AUTO_TRACE_EXIT("result={}",i+1);
1063 return static_cast<int>(i+1);
1064 }
1065 else if (data[i]==' ') // <bla attr=...
1066 {
1067 i++;
1068 bool insideAttr=FALSE;
1069 while (i<size)
1070 {
1071 if (!insideAttr && data[i]=='"')
1072 {
1073 insideAttr=TRUE;
1074 }
1075 else if (data[i]=='"' && data[i-1]!='\\')
1076 {
1077 insideAttr=FALSE;
1078 }
1079 else if (!insideAttr && data[i]=='>') // found end of tag
1080 {
1081 //printf("Found htmlTag={%s}\n",qPrint(QCString(data).left(i+1)));
1082 if (doWrite) out+=data.substr(0,i+1);
1083 AUTO_TRACE_EXIT("result={}",i+1);
1084 return static_cast<int>(i+1);
1085 }
1086 i++;
1087 }
1088 }
1089 }
1090 }
1091 AUTO_TRACE_EXIT("not a valid html tag");
1092 return 0;
1093}
1094
1095int Markdown::Private::processHtmlTag(std::string_view data,size_t offset)
1096{
1097 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
1098 return processHtmlTagWrite(data,offset,true);
1099}
1100
1101int Markdown::Private::processEmphasis(std::string_view data,size_t offset)
1102{
1103 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
1104 const size_t size = data.size();
1105
1106 if ((offset>0 && !isOpenEmphChar(data.data()[-1])) || // invalid char before * or _
1107 (size>1 && data[0]!=data[1] && !(isIdChar(data[1]) || extraChar(data[1]))) || // invalid char after * or _
1108 (size>2 && data[0]==data[1] && !(isIdChar(data[2]) || extraChar(data[2])))) // invalid char after ** or __
1109 {
1110 AUTO_TRACE_EXIT("invalid surrounding characters");
1111 return 0;
1112 }
1113
1114 char c = data[0];
1115 int ret = 0;
1116 if (size>2 && c!='~' && data[1]!=c) // _bla or *bla
1117 {
1118 // whitespace cannot follow an opening emphasis
1119 if (data[1]==' ' || data[1]=='\n' ||
1120 (ret = processEmphasis1(data.substr(1), c)) == 0)
1121 {
1122 return 0;
1123 }
1124 AUTO_TRACE_EXIT("result={}",ret+1);
1125 return ret+1;
1126 }
1127 if (size>3 && data[1]==c && data[2]!=c) // __bla or **bla
1128 {
1129 if (data[2]==' ' || data[2]=='\n' ||
1130 (ret = processEmphasis2(data.substr(2), c)) == 0)
1131 {
1132 return 0;
1133 }
1134 AUTO_TRACE_EXIT("result={}",ret+2);
1135 return ret+2;
1136 }
1137 if (size>4 && c!='~' && data[1]==c && data[2]==c && data[3]!=c) // ___bla or ***bla
1138 {
1139 if (data[3]==' ' || data[3]=='\n' ||
1140 (ret = processEmphasis3(data.substr(3), c)) == 0)
1141 {
1142 return 0;
1143 }
1144 AUTO_TRACE_EXIT("result={}",ret+3);
1145 return ret+3;
1146 }
1147 return 0;
1148}
1149
1151 std::string_view fmt, bool inline_img, bool explicitTitle,
1152 const QCString &title, const QCString &content,
1153 const QCString &link, const QCString &attrs,
1154 const FileDef *fd)
1155{
1156 AUTO_TRACE("fmt={} inline_img={} explicitTitle={} title={} content={} link={} attrs={}",
1157 fmt,inline_img,explicitTitle,Trace::trunc(title),Trace::trunc(content),link,attrs);
1158 QCString attributes = getFilteredImageAttributes(fmt, attrs);
1159 out+="@image";
1160 if (inline_img)
1161 {
1162 out+="{inline}";
1163 }
1164 out+=" ";
1165 out+=fmt;
1166 out+=" ";
1167 out+=link.mid(fd ? 0 : 5);
1168 if (!explicitTitle && !content.isEmpty())
1169 {
1170 out+=" \"";
1171 out+=escapeDoubleQuotes(content);
1172 out+="\"";
1173 }
1174 else if ((content.isEmpty() || explicitTitle) && !title.isEmpty())
1175 {
1176 out+=" \"";
1177 out+=escapeDoubleQuotes(title);
1178 out+="\"";
1179 }
1180 else
1181 {
1182 out+=" ";// so the line break will not be part of the image name
1183 }
1184 if (!attributes.isEmpty())
1185 {
1186 out+=" ";
1187 out+=attributes;
1188 out+=" ";
1189 }
1190 out+="\\ilinebr ";
1191}
1192
1193int Markdown::Private::processLink(const std::string_view data,size_t offset)
1194{
1195 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
1196 const size_t size = data.size();
1197 QCString content;
1198 QCString link;
1199 QCString title;
1200 bool isImageLink = FALSE;
1201 bool isImageInline = FALSE;
1202 bool isToc = FALSE;
1203 size_t i=1;
1204 if (data[0]=='!')
1205 {
1206 isImageLink = TRUE;
1207 if (size<2 || data[1]!='[')
1208 {
1209 return 0;
1210 }
1211
1212 // if there is non-whitespace before the ![ within the scope of two new lines, the image
1213 // is considered inlined, i.e. the image is not preceded by an empty line
1214 int numNLsNeeded=2;
1215 int pos = -1;
1216 while (pos>=-static_cast<int>(offset) && numNLsNeeded>0)
1217 {
1218 if (data.data()[pos]=='\n') numNLsNeeded--;
1219 else if (data.data()[pos]!=' ') // found non-whitespace, stop searching
1220 {
1221 isImageInline=true;
1222 break;
1223 }
1224 pos--;
1225 }
1226 // skip '!['
1227 i++;
1228 }
1229 size_t contentStart=i;
1230 int level=1;
1231 int nlTotal=0;
1232 int nl=0;
1233 // find the matching ]
1234 while (i<size)
1235 {
1236 if (data[i-1]=='\\') // skip escaped characters
1237 {
1238 }
1239 else if (data[i]=='[')
1240 {
1241 level++;
1242 }
1243 else if (data[i]==']')
1244 {
1245 level--;
1246 if (level<=0) break;
1247 }
1248 else if (data[i]=='\n')
1249 {
1250 nl++;
1251 if (nl>1) { return 0; } // only allow one newline in the content
1252 }
1253 i++;
1254 }
1255 nlTotal += nl;
1256 nl = 0;
1257 if (i>=size) return 0; // premature end of comment -> no link
1258 size_t contentEnd=i;
1259 content = data.substr(contentStart,contentEnd-contentStart);
1260 //printf("processLink: content={%s}\n",qPrint(content));
1261 if (!isImageLink && content.isEmpty()) { return 0; } // no link text
1262 i++; // skip over ]
1263
1264 bool whiteSpace = false;
1265 // skip whitespace
1266 while (i<size && data[i]==' ') { whiteSpace = true; i++; }
1267 if (i<size && data[i]=='\n') // one newline allowed here
1268 {
1269 whiteSpace = true;
1270 i++;
1271 // skip more whitespace
1272 while (i<size && data[i]==' ') i++;
1273 }
1274 if (whiteSpace && i<size && (data[i]=='(' || data[i]=='[')) return 0;
1275
1276 bool explicitTitle=FALSE;
1277 if (i<size && data[i]=='(') // inline link
1278 {
1279 i++;
1280 while (i<size && data[i]==' ') i++;
1281 bool uriFormat=false;
1282 if (i<size && data[i]=='<') { i++; uriFormat=true; }
1283 size_t linkStart=i;
1284 int braceCount=1;
1285 while (i<size && data[i]!='\'' && data[i]!='"' && braceCount>0)
1286 {
1287 if (data[i]=='\n') // unexpected EOL
1288 {
1289 nl++;
1290 if (nl>1) { return 0; }
1291 }
1292 else if (data[i]=='(')
1293 {
1294 braceCount++;
1295 }
1296 else if (data[i]==')')
1297 {
1298 braceCount--;
1299 }
1300 if (braceCount>0)
1301 {
1302 i++;
1303 }
1304 }
1305 nlTotal += nl;
1306 nl = 0;
1307 if (i>=size || data[i]=='\n') { return 0; }
1308 link = data.substr(linkStart,i-linkStart);
1309 link = link.stripWhiteSpace();
1310 //printf("processLink: link={%s}\n",qPrint(link));
1311 if (link.isEmpty()) { return 0; }
1312 if (uriFormat && link.at(link.length()-1)=='>') link=link.left(link.length()-1);
1313
1314 // optional title
1315 if (data[i]=='\'' || data[i]=='"')
1316 {
1317 char c = data[i];
1318 i++;
1319 size_t titleStart=i;
1320 nl=0;
1321 while (i<size)
1322 {
1323 if (data[i]=='\n')
1324 {
1325 if (nl>1) { return 0; }
1326 nl++;
1327 }
1328 else if (data[i]=='\\') // escaped char in string
1329 {
1330 i++;
1331 }
1332 else if (data[i]==c)
1333 {
1334 i++;
1335 break;
1336 }
1337 i++;
1338 }
1339 if (i>=size)
1340 {
1341 return 0;
1342 }
1343 size_t titleEnd = i-1;
1344 // search back for closing marker
1345 while (titleEnd>titleStart && data[titleEnd]==' ') titleEnd--;
1346 if (data[titleEnd]==c) // found it
1347 {
1348 title = data.substr(titleStart,titleEnd-titleStart);
1349 explicitTitle=TRUE;
1350 while (i<size)
1351 {
1352 if (data[i]==' ')i++; // remove space after the closing quote and the closing bracket
1353 else if (data[i] == ')') break; // the end bracket
1354 else // illegal
1355 {
1356 return 0;
1357 }
1358 }
1359 }
1360 else
1361 {
1362 return 0;
1363 }
1364 }
1365 i++;
1366 }
1367 else if (i<size && data[i]=='[') // reference link
1368 {
1369 i++;
1370 size_t linkStart=i;
1371 nl=0;
1372 // find matching ]
1373 while (i<size && data[i]!=']')
1374 {
1375 if (data[i]=='\n')
1376 {
1377 nl++;
1378 if (nl>1) { return 0; }
1379 }
1380 i++;
1381 }
1382 if (i>=size) { return 0; }
1383 // extract link
1384 link = data.substr(linkStart,i-linkStart);
1385 //printf("processLink: link={%s}\n",qPrint(link));
1386 link = link.stripWhiteSpace();
1387 if (link.isEmpty()) // shortcut link
1388 {
1389 link=content;
1390 }
1391 // lookup reference
1392 QCString link_lower = link.lower();
1393 auto lr_it=linkRefs.find(link_lower.str());
1394 if (lr_it!=linkRefs.end()) // found it
1395 {
1396 link = lr_it->second.link;
1397 title = lr_it->second.title;
1398 //printf("processLink: ref: link={%s} title={%s}\n",qPrint(link),qPrint(title));
1399 }
1400 else // reference not found!
1401 {
1402 //printf("processLink: ref {%s} do not exist\n",link.qPrint(lower()));
1403 return 0;
1404 }
1405 i++;
1406 }
1407 else if (i<size && data[i]!=':' && !content.isEmpty()) // minimal link ref notation [some id]
1408 {
1409 QCString content_lower = content.lower();
1410 auto lr_it = linkRefs.find(content_lower.str());
1411 //printf("processLink: minimal link {%s} lr=%p",qPrint(content),lr);
1412 if (lr_it!=linkRefs.end()) // found it
1413 {
1414 link = lr_it->second.link;
1415 title = lr_it->second.title;
1416 explicitTitle=TRUE;
1417 i=contentEnd;
1418 }
1419 else if (content=="TOC")
1420 {
1421 isToc=TRUE;
1422 i=contentEnd;
1423 }
1424 else
1425 {
1426 return 0;
1427 }
1428 i++;
1429 }
1430 else
1431 {
1432 return 0;
1433 }
1434 nlTotal += nl;
1435
1436 // search for optional image attributes
1437 QCString attributes;
1438 if (isImageLink)
1439 {
1440 size_t j = i;
1441 // skip over whitespace
1442 while (j<size && data[j]==' ') { j++; }
1443 if (j<size && data[j]=='{') // we have attributes
1444 {
1445 i = j;
1446 // skip over '{'
1447 i++;
1448 size_t attributesStart=i;
1449 nl=0;
1450 // find the matching '}'
1451 while (i<size)
1452 {
1453 if (data[i-1]=='\\') // skip escaped characters
1454 {
1455 }
1456 else if (data[i]=='{')
1457 {
1458 level++;
1459 }
1460 else if (data[i]=='}')
1461 {
1462 level--;
1463 if (level<=0) break;
1464 }
1465 else if (data[i]=='\n')
1466 {
1467 nl++;
1468 if (nl>1) { return 0; } // only allow one newline in the content
1469 }
1470 i++;
1471 }
1472 nlTotal += nl;
1473 if (i>=size) return 0; // premature end of comment -> no attributes
1474 size_t attributesEnd=i;
1475 attributes = data.substr(attributesStart,attributesEnd-attributesStart);
1476 i++; // skip over '}'
1477 }
1478 if (!isImageInline)
1479 {
1480 // if there is non-whitespace after the image within the scope of two new lines, the image
1481 // is considered inlined, i.e. the image is not followed by an empty line
1482 int numNLsNeeded=2;
1483 size_t pos = i;
1484 while (pos<size && numNLsNeeded>0)
1485 {
1486 if (data[pos]=='\n') numNLsNeeded--;
1487 else if (data[pos]!=' ') // found non-whitespace, stop searching
1488 {
1489 isImageInline=true;
1490 break;
1491 }
1492 pos++;
1493 }
1494 }
1495 }
1496
1497 if (isToc) // special case for [TOC]
1498 {
1499 int toc_level = Config_getInt(TOC_INCLUDE_HEADINGS);
1500 if (toc_level>=SectionType::MinLevel && toc_level<=SectionType::MaxLevel)
1501 {
1502 out+="@tableofcontents{html:";
1503 out+=QCString().setNum(toc_level);
1504 out+="}";
1505 }
1506 }
1507 else if (isImageLink)
1508 {
1509 bool ambig = false;
1510 FileDef *fd=nullptr;
1511 if (link.find("@ref ")!=-1 || link.find("\\ref ")!=-1 ||
1513 // assume doxygen symbol link or local image link
1514 {
1515 // check if different handling is needed per format
1516 writeMarkdownImage("html", isImageInline, explicitTitle, title, content, link, attributes, fd);
1517 writeMarkdownImage("latex", isImageInline, explicitTitle, title, content, link, attributes, fd);
1518 writeMarkdownImage("rtf", isImageInline, explicitTitle, title, content, link, attributes, fd);
1519 writeMarkdownImage("docbook", isImageInline, explicitTitle, title, content, link, attributes, fd);
1520 writeMarkdownImage("xml", isImageInline, explicitTitle, title, content, link, attributes, fd);
1521 }
1522 else
1523 {
1524 out+="<img src=\"";
1525 out+=link;
1526 out+="\" alt=\"";
1527 out+=content;
1528 out+="\"";
1529 if (!title.isEmpty())
1530 {
1531 out+=" title=\"";
1532 out+=substitute(title.simplifyWhiteSpace(),"\"","&quot;");
1533 out+="\"";
1534 }
1535 out+="/>";
1536 }
1537 }
1538 else
1539 {
1541 int lp=-1;
1542 if ((lp=link.find("@ref "))!=-1 || (lp=link.find("\\ref "))!=-1 || (lang==SrcLangExt::Markdown && !isURL(link)))
1543 // assume doxygen symbol link
1544 {
1545 if (lp==-1) // link to markdown page
1546 {
1547 out+="@ref \"";
1548 if (!(Portable::isAbsolutePath(link) || isURL(link)))
1549 {
1550 FileInfo forg(link.str());
1551 if (forg.exists() && forg.isReadable())
1552 {
1553 link = forg.absFilePath();
1554 }
1555 else if (!(forg.exists() && forg.isReadable()))
1556 {
1557 FileInfo fi(fileName.str());
1558 QCString mdFile = fileName.left(fileName.length()-fi.fileName().length()) + link;
1559 FileInfo fmd(mdFile.str());
1560 if (fmd.exists() && fmd.isReadable())
1561 {
1562 link = fmd.absFilePath().data();
1563 }
1564 }
1565 }
1566 out+=link;
1567 out+="\"";
1568 }
1569 else
1570 {
1571 out+=link;
1572 }
1573 out+=" \"";
1574 if (explicitTitle && !title.isEmpty())
1575 {
1576 out+=substitute(title,"\"","&quot;");
1577 }
1578 else
1579 {
1580 processInline(std::string_view(substitute(content,"\"","&quot;").str()));
1581 }
1582 out+="\"";
1583 }
1584 else if ((lp=link.find('#'))!=-1 || link.find('/')!=-1 || link.find('.')!=-1)
1585 { // file/url link
1586 if (lp==0 || (lp>0 && !isURL(link) && Config_getEnum(MARKDOWN_ID_STYLE)==MARKDOWN_ID_STYLE_t::GITHUB))
1587 {
1588 out+="@ref \"";
1590 out+="\" \"";
1591 out+=substitute(content.simplifyWhiteSpace(),"\"","&quot;");
1592 out+="\"";
1593 }
1594 else
1595 {
1596 out+="<a href=\"";
1597 out+=link;
1598 out+="\"";
1599 for (int ii = 0; ii < nlTotal; ii++) out+="\n";
1600 if (!title.isEmpty())
1601 {
1602 out+=" title=\"";
1603 out+=substitute(title.simplifyWhiteSpace(),"\"","&quot;");
1604 out+="\"";
1605 }
1606 out+=" ";
1608 out+=">";
1609 content = content.simplifyWhiteSpace();
1610 processInline(std::string_view(content.str()));
1611 out+="</a>";
1612 }
1613 }
1614 else // avoid link to e.g. F[x](y)
1615 {
1616 //printf("no link for '%s'\n",qPrint(link));
1617 return 0;
1618 }
1619 }
1620 AUTO_TRACE_EXIT("result={}",i);
1621 return static_cast<int>(i);
1622}
1623
1624/** `` ` `` parsing a code span (assuming codespan != 0) */
1625int Markdown::Private::processCodeSpan(std::string_view data,size_t)
1626{
1627 AUTO_TRACE("data='{}'",Trace::trunc(data));
1628 const size_t size = data.size();
1629
1630 /* counting the number of backticks in the delimiter */
1631 size_t nb=0, end=0;
1632 while (nb<size && data[nb]=='`')
1633 {
1634 nb++;
1635 }
1636
1637 /* finding the next delimiter */
1638 size_t i = 0;
1639 char pc = '`';
1640 for (end=nb; end<size && i<nb; end++)
1641 {
1642 if (data[end]=='`')
1643 {
1644 i++;
1645 }
1646 else if (data[end]=='\n')
1647 {
1648 // consecutive newlines
1649 if (pc == '\n') return 0;
1650 pc = '\n';
1651 i = 0;
1652 }
1653 else if (data[end]=='\'' && nb==1 && (end==size-1 || (end+1<size && !isIdChar(data[end+1]))))
1654 { // look for quoted strings like 'some word', but skip strings like `it's cool`
1655 out+="&lsquo;";
1656 out+=data.substr(nb,end-nb);
1657 out+="&rsquo;";
1658 return static_cast<int>(end+1);
1659 }
1660 else
1661 {
1662 if (data[end]!=' ') pc = data[end];
1663 i=0;
1664 }
1665 }
1666 if (i < nb && end >= size)
1667 {
1668 return 0; // no matching delimiter
1669 }
1670
1671 // trimming outside whitespaces
1672 size_t f_begin = nb;
1673 while (f_begin < end && data[f_begin]==' ')
1674 {
1675 f_begin++;
1676 }
1677 size_t f_end = end - nb;
1678 while (f_end > nb && data[f_end-1]==' ')
1679 {
1680 f_end--;
1681 }
1682
1683 //printf("found code span '%s'\n",qPrint(QCString(data+f_begin).left(f_end-f_begin)));
1684
1685 /* real code span */
1686 if (f_begin < f_end)
1687 {
1688 QCString codeFragment = data.substr(f_begin, f_end-f_begin);
1689 out+="<tt>";
1690 out+=escapeSpecialChars(codeFragment);
1691 out+="</tt>";
1692 }
1693 AUTO_TRACE_EXIT("result={}",end);
1694 return static_cast<int>(end);
1695}
1696
1698{
1699 AUTO_TRACE("{}",Trace::trunc(data));
1700 if (Portable::strnstr(data.data(),g_doxy_nbsp,data.size())==nullptr) // no escape needed -> fast
1701 {
1702 out+=data;
1703 }
1704 else // escape needed -> slow
1705 {
1707 }
1708}
1709
1710int Markdown::Private::processSpecialCommand(std::string_view data, size_t offset)
1711{
1712 AUTO_TRACE("{}",Trace::trunc(data));
1713 const size_t size = data.size();
1714 size_t i=1;
1715 QCString endBlockName = isBlockCommand(data,offset);
1716 if (!endBlockName.isEmpty())
1717 {
1718 AUTO_TRACE_ADD("endBlockName={}",endBlockName);
1719 size_t l = endBlockName.length();
1720 while (i+l<size)
1721 {
1722 if ((data[i]=='\\' || data[i]=='@') && // command
1723 data[i-1]!='\\' && data[i-1]!='@') // not escaped
1724 {
1725 if (qstrncmp(&data[i+1],endBlockName.data(),l)==0)
1726 {
1727 //printf("found end at %d\n",i);
1728 addStrEscapeUtf8Nbsp(data.substr(0,i+1+l));
1729 AUTO_TRACE_EXIT("result={}",i+1+l);
1730 return static_cast<int>(i+1+l);
1731 }
1732 }
1733 i++;
1734 }
1735 }
1736 size_t endPos = isSpecialCommand(data,offset);
1737 if (endPos>0)
1738 {
1739 out+=data.substr(0,endPos);
1740 return static_cast<int>(endPos);
1741 }
1742 if (size>1 && data[0]=='\\') // escaped characters
1743 {
1744 char c=data[1];
1745 if (c=='[' || c==']' || c=='*' || c=='(' || c==')' || c=='`' || c=='_')
1746 {
1747 out+=data[1];
1748 AUTO_TRACE_EXIT("2");
1749 return 2;
1750 }
1751 else if (c=='\\' || c=='@')
1752 {
1753 out+=data.substr(0,2);
1754 AUTO_TRACE_EXIT("2");
1755 return 2;
1756 }
1757 else if (c=='-' && size>3 && data[2]=='-' && data[3]=='-') // \---
1758 {
1759 out+=data.substr(1,3);
1760 AUTO_TRACE_EXIT("2");
1761 return 4;
1762 }
1763 else if (c=='-' && size>2 && data[2]=='-') // \--
1764 {
1765 out+=data.substr(1,2);
1766 AUTO_TRACE_EXIT("3");
1767 return 3;
1768 }
1769 }
1770 else if (size>1 && data[0]=='@') // escaped characters
1771 {
1772 char c=data[1];
1773 if (c=='\\' || c=='@')
1774 {
1775 out+=data.substr(0,2);
1776 AUTO_TRACE_EXIT("2");
1777 return 2;
1778 }
1779 }
1780 return 0;
1781}
1782
1783void Markdown::Private::processInline(std::string_view data)
1784{
1785 AUTO_TRACE("data='{}'",Trace::trunc(data));
1786 size_t i=0;
1787 size_t end=0;
1788 Action_t action;
1789 const size_t size = data.size();
1790 while (i<size)
1791 {
1792 // skip over characters that do not trigger a specific action
1793 while (end<size && ((action=actions[static_cast<uint8_t>(data[end])])==nullptr)) end++;
1794 // and add them to the output
1795 out+=data.substr(i,end-i);
1796 if (end>=size) break;
1797 i=end;
1798 // do the action matching a special character at i
1799 int iend = action(data.substr(i),i);
1800 if (iend<=0) // update end
1801 {
1802 end=i+1-iend;
1803 }
1804 else // skip until end
1805 {
1806 i+=iend;
1807 end=i;
1808 }
1809 }
1810}
1811
1812/** returns whether the line is a setext-style hdr underline */
1813int Markdown::Private::isHeaderline(std::string_view data, bool allowAdjustLevel)
1814{
1815 AUTO_TRACE("data='{}' allowAdjustLevel",Trace::trunc(data),allowAdjustLevel);
1816 size_t i=0, c=0;
1817 const size_t size = data.size();
1818 while (i<size && data[i]==' ') i++;
1819 if (i==size) return 0;
1820
1821 // test of level 1 header
1822 if (data[i]=='=')
1823 {
1824 while (i<size && data[i]=='=') i++,c++;
1825 while (i<size && data[i]==' ') i++;
1826 int level = (c>1 && (i>=size || data[i]=='\n')) ? 1 : 0;
1827 if (allowAdjustLevel && level==1 && indentLevel==-1)
1828 {
1829 // In case a page starts with a header line we use it as title, promoting it to @page.
1830 // We set g_indentLevel to -1 to promoting the other sections if they have a deeper
1831 // nesting level than the page header, i.e. @section..@subsection becomes @page..@section.
1832 // In case a section at the same level is found (@section..@section) however we need
1833 // to undo this (and the result will be @page..@section).
1834 indentLevel=0;
1835 }
1836 AUTO_TRACE_EXIT("result={}",indentLevel+level);
1837 return indentLevel+level;
1838 }
1839 // test of level 2 header
1840 if (data[i]=='-')
1841 {
1842 while (i<size && data[i]=='-') i++,c++;
1843 while (i<size && data[i]==' ') i++;
1844 return (c>1 && (i>=size || data[i]=='\n')) ? indentLevel+2 : 0;
1845 }
1846 return 0;
1847}
1848
1849/** returns true if this line starts a block quote */
1850static bool isBlockQuote(std::string_view data,size_t indent)
1851{
1852 AUTO_TRACE("data='{}' indent={}",Trace::trunc(data),indent);
1853 size_t i = 0;
1854 const size_t size = data.size();
1855 while (i<size && data[i]==' ') i++;
1856 if (i<indent+codeBlockIndent) // could be a quotation
1857 {
1858 // count >'s and skip spaces
1859 int level=0;
1860 while (i<size && (data[i]=='>' || data[i]==' '))
1861 {
1862 if (data[i]=='>') level++;
1863 i++;
1864 }
1865 // last characters should be a space or newline,
1866 // so a line starting with >= does not match, but only when level equals 1
1867 bool res = (level>0 && i<size && ((data[i-1]==' ') || data[i]=='\n')) || (level > 1);
1868 AUTO_TRACE_EXIT("result={}",res);
1869 return res;
1870 }
1871 else // too much indentation -> code block
1872 {
1873 AUTO_TRACE_EXIT("result=false: too much indentation");
1874 return false;
1875 }
1876}
1877
1878/** returns end of the link ref if this is indeed a link reference. */
1879static size_t isLinkRef(std::string_view data, QCString &refid, QCString &link, QCString &title)
1880{
1881 AUTO_TRACE("data='{}'",Trace::trunc(data));
1882 const size_t size = data.size();
1883 // format: start with [some text]:
1884 size_t i = 0;
1885 while (i<size && data[i]==' ') i++;
1886 if (i>=size || data[i]!='[') { return 0; }
1887 i++;
1888 size_t refIdStart=i;
1889 while (i<size && data[i]!='\n' && data[i]!=']') i++;
1890 if (i>=size || data[i]!=']') { return 0; }
1891 refid = data.substr(refIdStart,i-refIdStart);
1892 if (refid.isEmpty()) { return 0; }
1893 AUTO_TRACE_ADD("refid found {}",refid);
1894 //printf(" isLinkRef: found refid='%s'\n",qPrint(refid));
1895 i++;
1896 if (i>=size || data[i]!=':') { return 0; }
1897 i++;
1898
1899 // format: whitespace* \n? whitespace* (<url> | url)
1900 while (i<size && data[i]==' ') i++;
1901 if (i<size && data[i]=='\n')
1902 {
1903 i++;
1904 while (i<size && data[i]==' ') i++;
1905 }
1906 if (i>=size) { return 0; }
1907
1908 if (i<size && data[i]=='<') i++;
1909 size_t linkStart=i;
1910 while (i<size && data[i]!=' ' && data[i]!='\n') i++;
1911 size_t linkEnd=i;
1912 if (i<size && data[i]=='>') i++;
1913 if (linkStart==linkEnd) { return 0; } // empty link
1914 link = data.substr(linkStart,linkEnd-linkStart);
1915 AUTO_TRACE_ADD("link found {}",Trace::trunc(link));
1916 if (link=="@ref" || link=="\\ref")
1917 {
1918 size_t argStart=i;
1919 while (i<size && data[i]!='\n' && data[i]!='"') i++;
1920 link+=data.substr(argStart,i-argStart);
1921 }
1922
1923 title.clear();
1924
1925 // format: (whitespace* \n? whitespace* ( 'title' | "title" | (title) ))?
1926 size_t eol=0;
1927 while (i<size && data[i]==' ') i++;
1928 if (i<size && data[i]=='\n')
1929 {
1930 eol=i;
1931 i++;
1932 while (i<size && data[i]==' ') i++;
1933 }
1934 if (i>=size)
1935 {
1936 AUTO_TRACE_EXIT("result={}: end of isLinkRef while looking for title",i);
1937 return i; // end of buffer while looking for the optional title
1938 }
1939
1940 char c = data[i];
1941 if (c=='\'' || c=='"' || c=='(') // optional title present?
1942 {
1943 //printf(" start of title found! char='%c'\n",c);
1944 i++;
1945 if (c=='(') c=')'; // replace c by end character
1946 size_t titleStart=i;
1947 // search for end of the line
1948 while (i<size && data[i]!='\n') i++;
1949 eol = i;
1950
1951 // search back to matching character
1952 size_t end=i-1;
1953 while (end>titleStart && data[end]!=c) end--;
1954 if (end>titleStart)
1955 {
1956 title = data.substr(titleStart,end-titleStart);
1957 }
1958 AUTO_TRACE_ADD("title found {}",Trace::trunc(title));
1959 }
1960 while (i<size && data[i]==' ') i++;
1961 //printf("end of isLinkRef: i=%d size=%d data[i]='%c' eol=%d\n",
1962 // i,size,data[i],eol);
1963 if (i>=size) { AUTO_TRACE_EXIT("result={}",i); return i; } // end of buffer while ref id was found
1964 else if (eol>0) { AUTO_TRACE_EXIT("result={}",eol); return eol; } // end of line while ref id was found
1965 return 0; // invalid link ref
1966}
1967
1968static bool isHRuler(std::string_view data)
1969{
1970 AUTO_TRACE("data='{}'",Trace::trunc(data));
1971 size_t i=0;
1972 size_t size = data.size();
1973 if (size>0 && data[size-1]=='\n') size--; // ignore newline character
1974 while (i<size && data[i]==' ') i++;
1975 if (i>=size) { AUTO_TRACE_EXIT("result=false: empty line"); return false; } // empty line
1976 char c=data[i];
1977 if (c!='*' && c!='-' && c!='_')
1978 {
1979 AUTO_TRACE_EXIT("result=false: {} is not a hrule character",c);
1980 return false; // not a hrule character
1981 }
1982 int n=0;
1983 while (i<size)
1984 {
1985 if (data[i]==c)
1986 {
1987 n++; // count rule character
1988 }
1989 else if (data[i]!=' ')
1990 {
1991 AUTO_TRACE_EXIT("result=false: line contains non hruler characters");
1992 return false; // line contains non hruler characters
1993 }
1994 i++;
1995 }
1996 AUTO_TRACE_EXIT("result={}",n>=3);
1997 return n>=3; // at least 3 characters needed for a hruler
1998}
1999
2000QCString Markdown::Private::extractTitleId(QCString &title, int level, bool *pIsIdGenerated)
2001{
2002 AUTO_TRACE("title={} level={}",Trace::trunc(title),level);
2003 // match e.g. '{#id-b11} ' and capture 'id-b11'
2004 static const reg::Ex r2(R"({#(\a[\w-]*)}\s*$)");
2005 reg::Match match;
2006 std::string ti = title.str();
2007 if (reg::search(ti,match,r2))
2008 {
2009 std::string id = match[1].str();
2010 title = title.left(match.position());
2011 if (AnchorGenerator::instance().reserve(id)>0)
2012 {
2013 warn(fileName, lineNr, "An automatically generated id already has the name '{}'!", id);
2014 }
2015 //printf("found match id='%s' title=%s\n",id.c_str(),qPrint(title));
2016 AUTO_TRACE_EXIT("id={}",id);
2017 return id;
2018 }
2019 if (((level>0) && (level<=Config_getInt(TOC_INCLUDE_HEADINGS))) || (Config_getEnum(MARKDOWN_ID_STYLE)==MARKDOWN_ID_STYLE_t::GITHUB))
2020 {
2022 if (pIsIdGenerated) *pIsIdGenerated=true;
2023 //printf("auto-generated id='%s' title='%s'\n",qPrint(id),qPrint(title));
2024 AUTO_TRACE_EXIT("id={}",id);
2025 return id;
2026 }
2027 //printf("no id found in title '%s'\n",qPrint(title));
2028 return "";
2029}
2030
2031
2032int Markdown::Private::isAtxHeader(std::string_view data,
2033 QCString &header,QCString &id,bool allowAdjustLevel,bool *pIsIdGenerated)
2034{
2035 AUTO_TRACE("data='{}' header={} id={} allowAdjustLevel={}",Trace::trunc(data),Trace::trunc(header),id,allowAdjustLevel);
2036 size_t i = 0;
2037 int level = 0, blanks=0;
2038 const size_t size = data.size();
2039
2040 // find start of header text and determine heading level
2041 while (i<size && data[i]==' ') i++;
2042 if (i>=size || data[i]!='#')
2043 {
2044 return 0;
2045 }
2046 while (i<size && data[i]=='#') i++,level++;
2047 if (level>SectionType::MaxLevel) // too many #'s -> no section
2048 {
2049 return 0;
2050 }
2051 while (i<size && data[i]==' ') i++,blanks++;
2052 if (level==1 && blanks==0)
2053 {
2054 return 0; // special case to prevent #someid seen as a header (see bug 671395)
2055 }
2056
2057 // find end of header text
2058 size_t end=i;
2059 while (end<size && data[end]!='\n') end++;
2060 while (end>i && (data[end-1]=='#' || data[end-1]==' ')) end--;
2061
2062 // store result
2063 header = data.substr(i,end-i);
2064 id = extractTitleId(header, level, pIsIdGenerated);
2065 if (!id.isEmpty()) // strip #'s between title and id
2066 {
2067 int idx=static_cast<int>(header.length())-1;
2068 while (idx>=0 && (header.at(idx)=='#' || header.at(idx)==' ')) idx--;
2069 header=header.left(idx+1);
2070 }
2071
2072 if (allowAdjustLevel && level==1 && indentLevel==-1)
2073 {
2074 // in case we find a `# Section` on a markdown page that started with the same level
2075 // header, we no longer need to artificially decrease the paragraph level.
2076 // So both
2077 // -------------------
2078 // # heading 1 <-- here we set g_indentLevel to -1
2079 // # heading 2 <-- here we set g_indentLevel back to 0 such that this will be a @section
2080 // -------------------
2081 // and
2082 // -------------------
2083 // # heading 1 <-- here we set g_indentLevel to -1
2084 // ## heading 2 <-- here we keep g_indentLevel at -1 such that @subsection will be @section
2085 // -------------------
2086 // will convert to
2087 // -------------------
2088 // @page md_page Heading 1
2089 // @section autotoc_md1 Heading 2
2090 // -------------------
2091
2092 indentLevel=0;
2093 }
2094 int res = level+indentLevel;
2095 AUTO_TRACE_EXIT("result={}",res);
2096 return res;
2097}
2098
2099static bool isEmptyLine(std::string_view data)
2100{
2101 AUTO_TRACE("data='{}'",Trace::trunc(data));
2102 size_t i=0;
2103 while (i<data.size())
2104 {
2105 if (data[i]=='\n') { AUTO_TRACE_EXIT("true"); return true; }
2106 if (data[i]!=' ') { AUTO_TRACE_EXIT("false"); return false; }
2107 i++;
2108 }
2109 AUTO_TRACE_EXIT("true");
2110 return true;
2111}
2112
2113#define isLiTag(i) \
2114 (data[(i)]=='<' && \
2115 (data[(i)+1]=='l' || data[(i)+1]=='L') && \
2116 (data[(i)+2]=='i' || data[(i)+2]=='I') && \
2117 (data[(i)+3]=='>'))
2118
2119// compute the indent from the start of the input, excluding list markers
2120// such as -, -#, *, +, 1., and <li>
2121static size_t computeIndentExcludingListMarkers(std::string_view data)
2122{
2123 AUTO_TRACE("data='{}'",Trace::trunc(data));
2124 size_t i=0;
2125 const size_t size=data.size();
2126 size_t indent=0;
2127 bool isDigit=FALSE;
2128 bool isLi=FALSE;
2129 bool listMarkerSkipped=FALSE;
2130 while (i<size &&
2131 (data[i]==' ' || // space
2132 (!listMarkerSkipped && // first list marker
2133 (data[i]=='+' || data[i]=='-' || data[i]=='*' || // unordered list char
2134 (data[i]=='#' && i>0 && data[i-1]=='-') || // -# item
2135 (isDigit=(data[i]>='1' && data[i]<='9')) || // ordered list marker?
2136 (isLi=(size>=3 && i+3<size && isLiTag(i))) // <li> tag
2137 )
2138 )
2139 )
2140 )
2141 {
2142 if (isDigit) // skip over ordered list marker '10. '
2143 {
2144 size_t j=i+1;
2145 while (j<size && ((data[j]>='0' && data[j]<='9') || data[j]=='.'))
2146 {
2147 if (data[j]=='.') // should be end of the list marker
2148 {
2149 if (j+1<size && data[j+1]==' ') // valid list marker
2150 {
2151 listMarkerSkipped=TRUE;
2152 indent+=j+1-i;
2153 i=j+1;
2154 break;
2155 }
2156 else // not a list marker
2157 {
2158 break;
2159 }
2160 }
2161 j++;
2162 }
2163 }
2164 else if (isLi)
2165 {
2166 i+=3; // skip over <li>
2167 indent+=3;
2168 listMarkerSkipped=TRUE;
2169 }
2170 else if (data[i]=='-' && size>=2 && i+2<size && data[i+1]=='#' && data[i+2]==' ')
2171 { // case "-# "
2172 listMarkerSkipped=TRUE; // only a single list marker is accepted
2173 i++; // skip over #
2174 indent++;
2175 }
2176 else if (data[i]!=' ' && i+1<size && data[i+1]==' ')
2177 { // case "- " or "+ " or "* "
2178 listMarkerSkipped=TRUE; // only a single list marker is accepted
2179 }
2180 if (data[i]!=' ' && !listMarkerSkipped)
2181 { // end of indent
2182 break;
2183 }
2184 indent++,i++;
2185 }
2186 AUTO_TRACE_EXIT("result={}",indent);
2187 return indent;
2188}
2189
2190static size_t isListMarker(std::string_view data)
2191{
2192 AUTO_TRACE("data='{}'",Trace::trunc(data));
2193 size_t normalIndent = 0;
2194 while (normalIndent<data.size() && data[normalIndent]==' ') normalIndent++;
2195 size_t listIndent = computeIndentExcludingListMarkers(data);
2196 size_t result = listIndent>normalIndent ? listIndent : 0;
2197 AUTO_TRACE_EXIT("result={}",result);
2198 return result;
2199}
2200
2201static bool isEndOfList(std::string_view data)
2202{
2203 AUTO_TRACE("data='{}'",Trace::trunc(data));
2204 int dots=0;
2205 size_t i=0;
2206 // end of list marker is an otherwise empty line with a dot.
2207 while (i<data.size())
2208 {
2209 if (data[i]=='.')
2210 {
2211 dots++;
2212 }
2213 else if (data[i]=='\n')
2214 {
2215 break;
2216 }
2217 else if (data[i]!=' ' && data[i]!='\t') // bail out if the line is not empty
2218 {
2219 AUTO_TRACE_EXIT("result=false");
2220 return false;
2221 }
2222 i++;
2223 }
2224 AUTO_TRACE_EXIT("result={}",dots==1);
2225 return dots==1;
2226}
2227
2228static bool isFencedCodeBlock(std::string_view data,size_t refIndent,
2229 QCString &lang,size_t &start,size_t &end,size_t &offset)
2230{
2231 AUTO_TRACE("data='{}' refIndent={}",Trace::trunc(data),refIndent);
2232 const char dot = '.';
2233 auto isAlphaChar = [ ](char c) { return (c>='A' && c<='Z') || (c>='a' && c<='z'); };
2234 auto isAlphaNChar = [ ](char c) { return (c>='A' && c<='Z') || (c>='a' && c<='z') || (c>='0' && c<='9') || (c=='+'); };
2235 auto isLangChar = [&](char c) { return c==dot || isAlphaChar(c); };
2236 // rules: at least 3 ~~~, end of the block same amount of ~~~'s, otherwise
2237 // return FALSE
2238 size_t i=0;
2239 size_t indent=0;
2240 int startTildes=0;
2241 const size_t size = data.size();
2242 while (i<size && data[i]==' ') indent++,i++;
2243 if (indent>=refIndent+4)
2244 {
2245 AUTO_TRACE_EXIT("result=false: content is part of code block indent={} refIndent={}",indent,refIndent);
2246 return FALSE;
2247 } // part of code block
2248 char tildaChar='~';
2249 if (i<size && data[i]=='`') tildaChar='`';
2250 while (i<size && data[i]==tildaChar) startTildes++,i++;
2251 if (startTildes<3)
2252 {
2253 AUTO_TRACE_EXIT("result=false: no fence marker found #tildes={}",startTildes);
2254 return FALSE;
2255 } // not enough tildes
2256 if (i<size && data[i]=='{') // extract .py from ```{.py} ... ```
2257 {
2258 i++; // skip over {
2259 if (data[i] == dot) i++; // skip over initial dot
2260 size_t startLang=i;
2261 while (i<size && (data[i]!='\n' && data[i]!='}')) i++; // find matching }
2262 if (i<size && data[i]=='}')
2263 {
2264 lang = data.substr(startLang,i-startLang);
2265 i++;
2266 }
2267 else // missing closing bracket, treat `{` as part of the content
2268 {
2269 i=startLang-1;
2270 lang="";
2271 }
2272 }
2273 else if (i<size && isLangChar(data[i])) /// extract python or .py from ```python...``` or ```.py...```
2274 {
2275 if (data[i] == dot) i++; // skip over initial dot
2276 size_t startLang=i;
2277 if (i<size && isAlphaChar(data[i])) //check first character of language specifier
2278 {
2279 i++;
2280 while (i<size && isAlphaNChar(data[i])) i++; // find end of language specifier
2281 }
2282 lang = data.substr(startLang,i-startLang);
2283 }
2284 else // no language specified
2285 {
2286 lang="";
2287 }
2288
2289 start=i;
2290 while (i<size)
2291 {
2292 if (data[i]==tildaChar)
2293 {
2294 end=i;
2295 int endTildes=0;
2296 while (i<size && data[i]==tildaChar) endTildes++,i++;
2297 while (i<size && data[i]==' ') i++;
2298 {
2299 if (endTildes==startTildes)
2300 {
2301 offset=i;
2302 AUTO_TRACE_EXIT("result=true: found end marker at offset {} lang='{}'",offset,lang);
2303 return true;
2304 }
2305 }
2306 }
2307 i++;
2308 }
2309 AUTO_TRACE_EXIT("result=false: no end marker found lang={}'",lang);
2310 return false;
2311}
2312
2313static bool isCodeBlock(std::string_view data, size_t offset,size_t &indent)
2314{
2315 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
2316 //printf("<isCodeBlock(offset=%d,size=%d,indent=%d)\n",offset,size,indent);
2317 // determine the indent of this line
2318 size_t i=0;
2319 size_t indent0=0;
2320 const size_t size = data.size();
2321 while (i<size && data[i]==' ') indent0++,i++;
2322
2323 if (indent0<codeBlockIndent)
2324 {
2325 AUTO_TRACE_EXIT("result={}: line is not indented enough {}<4",false,indent0);
2326 return false;
2327 }
2328 if (indent0>=size || data[indent0]=='\n') // empty line does not start a code block
2329 {
2330 AUTO_TRACE_EXIT("result={}: only spaces at the end of a comment block",false);
2331 return false;
2332 }
2333
2334 i=offset;
2335 int nl=0;
2336 int nl_pos[3];
2337 int offset_i = static_cast<int>(offset);
2338 // search back 3 lines and remember the start of lines -1 and -2
2339 while (i>0 && nl<3) // i counts down from offset to 1
2340 {
2341 int j = static_cast<int>(i)-offset_i-1; // j counts from -1 to -offset
2342 // since j can be negative we need to rewrap data in a std::string_view
2343 size_t nl_size = isNewline(std::string_view(data.data()+j,data.size()-j));
2344 if (nl_size>0)
2345 {
2346 nl_pos[nl++]=j+static_cast<int>(nl_size);
2347 }
2348 i--;
2349 }
2350
2351 // if there are only 2 preceding lines, then line -2 starts at -offset
2352 if (i==0 && nl==2) nl_pos[nl++]=-offset_i;
2353
2354 if (nl==3) // we have at least 2 preceding lines
2355 {
2356 //printf(" positions: nl_pos=[%d,%d,%d] line[-2]='%s' line[-1]='%s'\n",
2357 // nl_pos[0],nl_pos[1],nl_pos[2],
2358 // qPrint(QCString(data+nl_pos[1]).left(nl_pos[0]-nl_pos[1]-1)),
2359 // qPrint(QCString(data+nl_pos[2]).left(nl_pos[1]-nl_pos[2]-1)));
2360
2361 // check that line -1 is empty
2362 // Note that the offset is negative so we need to rewrap the string view
2363 if (!isEmptyLine(std::string_view(data.data()+nl_pos[1],nl_pos[0]-nl_pos[1]-1)))
2364 {
2365 AUTO_TRACE_EXIT("result={}",FALSE);
2366 return FALSE;
2367 }
2368
2369 // determine the indent of line -2
2370 // Note that the offset is negative so we need to rewrap the string view
2371 indent=std::max(indent,computeIndentExcludingListMarkers(
2372 std::string_view(data.data()+nl_pos[2],nl_pos[1]-nl_pos[2])));
2373
2374 //printf(">isCodeBlock local_indent %d>=%d+%d=%d\n",
2375 // indent0,indent,codeBlockIndent,indent0>=indent+codeBlockIndent);
2376 // if the difference is >4 spaces -> code block
2377 bool res = indent0>=indent+codeBlockIndent;
2378 AUTO_TRACE_EXIT("result={}: code block if indent difference >4 spaces",res);
2379 return res;
2380 }
2381 else // not enough lines to determine the relative indent, use global indent
2382 {
2383 // check that line -1 is empty
2384 // Note that the offset is negative so we need to rewrap the string view
2385 if (nl==1 && !isEmptyLine(std::string_view(data.data()-offset,offset-1)))
2386 {
2387 AUTO_TRACE_EXIT("result=false");
2388 return FALSE;
2389 }
2390 //printf(">isCodeBlock global indent %d>=%d+4=%d nl=%d\n",
2391 // indent0,indent,indent0>=indent+4,nl);
2392 bool res = indent0>=indent+codeBlockIndent;
2393 AUTO_TRACE_EXIT("result={}: code block if indent difference >4 spaces",res);
2394 return res;
2395 }
2396}
2397
2398/** Finds the location of the table's contains in the string \a data.
2399 * Only one line will be inspected.
2400 * @param[in] data pointer to the string buffer.
2401 * @param[out] start offset of the first character of the table content
2402 * @param[out] end offset of the last character of the table content
2403 * @param[out] columns number of table columns found
2404 * @returns The offset until the next line in the buffer.
2405 */
2406static size_t findTableColumns(std::string_view data,size_t &start,size_t &end,size_t &columns)
2407{
2408 AUTO_TRACE("data='{}'",Trace::trunc(data));
2409 const size_t size = data.size();
2410 size_t i=0,n=0;
2411 // find start character of the table line
2412 while (i<size && data[i]==' ') i++;
2413 if (i<size && data[i]=='|' && data[i]!='\n') i++,n++; // leading | does not count
2414 start = i;
2415
2416 // find end character of the table line
2417 size_t j = 0;
2418 while (i<size && (j = isNewline(data.substr(i)))==0) i++;
2419 size_t eol=i+j;
2420
2421 if (j>0 && i>0) i--; // move i to point before newline
2422 while (i>0 && data[i]==' ') i--;
2423 if (i>0 && data[i-1]!='\\' && data[i]=='|') i--,n++; // trailing or escaped | does not count
2424 end = i;
2425
2426 // count columns between start and end
2427 columns=0;
2428 if (end>start)
2429 {
2430 i=start;
2431 while (i<=end) // look for more column markers
2432 {
2433 if (data[i]=='|' && (i==0 || data[i-1]!='\\')) columns++;
2434 if (columns==1) columns++; // first | make a non-table into a two column table
2435 i++;
2436 }
2437 }
2438 if (n==2 && columns==0) // table row has | ... |
2439 {
2440 columns++;
2441 }
2442 AUTO_TRACE_EXIT("eol={} start={} end={} columns={}",eol,start,end,columns);
2443 return eol;
2444}
2445
2446/** Returns TRUE iff data points to the start of a table block */
2447static bool isTableBlock(std::string_view data)
2448{
2449 AUTO_TRACE("data='{}'",Trace::trunc(data));
2450 size_t cc0=0, start=0, end=0;
2451
2452 // the first line should have at least two columns separated by '|'
2453 size_t i = findTableColumns(data,start,end,cc0);
2454 if (i>=data.size() || cc0<1)
2455 {
2456 AUTO_TRACE_EXIT("result=false: no |'s in the header");
2457 return FALSE;
2458 }
2459
2460 size_t cc1 = 0;
2461 size_t ret = findTableColumns(data.substr(i),start,end,cc1);
2462 size_t j=i+start;
2463 // separator line should consist of |, - and : and spaces only
2464 while (j<=end+i)
2465 {
2466 if (data[j]!=':' && data[j]!='-' && data[j]!='|' && data[j]!=' ')
2467 {
2468 AUTO_TRACE_EXIT("result=false: invalid character '{}'",data[j]);
2469 return FALSE; // invalid characters in table separator
2470 }
2471 j++;
2472 }
2473 if (cc1!=cc0) // number of columns should be same as previous line
2474 {
2475 AUTO_TRACE_EXIT("result=false: different number of columns as previous line {}!={}",cc1,cc0);
2476 return FALSE;
2477 }
2478
2479 i+=ret; // goto next line
2480 size_t cc2 = 0;
2481 findTableColumns(data.substr(i),start,end,cc2);
2482
2483 AUTO_TRACE_EXIT("result={}",cc1==cc2);
2484 return cc1==cc2;
2485}
2486
2487size_t Markdown::Private::writeTableBlock(std::string_view data)
2488{
2489 AUTO_TRACE("data='{}'",Trace::trunc(data));
2490 const size_t size = data.size();
2491
2492 size_t columns=0, start=0, end=0;
2493 size_t i = findTableColumns(data,start,end,columns);
2494 size_t headerStart = start;
2495 size_t headerEnd = end;
2496
2497 // read cell alignments
2498 size_t cc = 0;
2499 size_t ret = findTableColumns(data.substr(i),start,end,cc);
2500 size_t k=0;
2501 std::vector<int> columnAlignment(columns);
2502
2503 bool leftMarker=false, rightMarker=false, startFound=false;
2504 size_t j=start+i;
2505 while (j<=end+i)
2506 {
2507 if (!startFound)
2508 {
2509 if (data[j]==':') { leftMarker=TRUE; startFound=TRUE; }
2510 if (data[j]=='-') startFound=TRUE;
2511 //printf(" data[%d]=%c startFound=%d\n",j,data[j],startFound);
2512 }
2513 if (data[j]=='-') rightMarker=FALSE;
2514 else if (data[j]==':') rightMarker=TRUE;
2515 if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
2516 {
2517 if (k<columns)
2518 {
2519 columnAlignment[k] = markersToAlignment(leftMarker,rightMarker);
2520 //printf("column[%d] alignment=%d\n",k,columnAlignment[k]);
2521 leftMarker=FALSE;
2522 rightMarker=FALSE;
2523 startFound=FALSE;
2524 }
2525 k++;
2526 }
2527 j++;
2528 }
2529 if (k<columns)
2530 {
2531 columnAlignment[k] = markersToAlignment(leftMarker,rightMarker);
2532 //printf("column[%d] alignment=%d\n",k,columnAlignment[k]);
2533 }
2534 // proceed to next line
2535 i+=ret;
2536
2537 // Store the table cell information by row then column. This
2538 // allows us to handle row spanning.
2539 std::vector<std::vector<TableCell> > tableContents;
2540
2541 size_t m = headerStart;
2542 std::vector<TableCell> headerContents(columns);
2543 for (k=0;k<columns;k++)
2544 {
2545 while (m<=headerEnd && (data[m]!='|' || (m>0 && data[m-1]=='\\')))
2546 {
2547 headerContents[k].cellText += data[m++];
2548 }
2549 m++;
2550 // do the column span test before stripping white space
2551 // || is spanning columns, | | is not
2552 headerContents[k].colSpan = headerContents[k].cellText.isEmpty();
2553 headerContents[k].cellText = headerContents[k].cellText.stripWhiteSpace();
2554 }
2555 tableContents.push_back(headerContents);
2556
2557 // write table cells
2558 while (i<size)
2559 {
2560 ret = findTableColumns(data.substr(i),start,end,cc);
2561 if (cc!=columns) break; // end of table
2562
2563 j=start+i;
2564 k=0;
2565 std::vector<TableCell> rowContents(columns);
2566 while (j<=end+i)
2567 {
2568 if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
2569 {
2570 // do the column span test before stripping white space
2571 // || is spanning columns, | | is not
2572 rowContents[k].colSpan = rowContents[k].cellText.isEmpty();
2573 rowContents[k].cellText = rowContents[k].cellText.stripWhiteSpace();
2574 k++;
2575 } // if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
2576 else
2577 {
2578 rowContents[k].cellText += data[j];
2579 } // else { if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\'))) }
2580 j++;
2581 } // while (j<=end+i)
2582 // do the column span test before stripping white space
2583 // || is spanning columns, | | is not
2584 rowContents[k].colSpan = rowContents[k].cellText.isEmpty();
2585 rowContents[k].cellText = rowContents[k].cellText.stripWhiteSpace();
2586 tableContents.push_back(rowContents);
2587
2588 // proceed to next line
2589 i+=ret;
2590 }
2591
2592 out+="<table class=\"markdownTable\">";
2593 QCString cellTag("th"), cellClass("class=\"markdownTableHead");
2594 for (size_t row = 0; row < tableContents.size(); row++)
2595 {
2596 if (row)
2597 {
2598 if (row % 2)
2599 {
2600 out+="\n<tr class=\"markdownTableRowOdd\">";
2601 }
2602 else
2603 {
2604 out+="\n<tr class=\"markdownTableRowEven\">";
2605 }
2606 }
2607 else
2608 {
2609 out+="\n <tr class=\"markdownTableHead\">";
2610 }
2611 for (size_t c = 0; c < columns; c++)
2612 {
2613 // save the cell text for use after column span computation
2614 QCString cellText(tableContents[row][c].cellText);
2615
2616 // Row span handling. Spanning rows will contain a caret ('^').
2617 // If the current cell contains just a caret, this is part of an
2618 // earlier row's span and the cell should not be added to the
2619 // output.
2620 if (tableContents[row][c].cellText == "^")
2621 {
2622 continue;
2623 }
2624 if (tableContents[row][c].colSpan)
2625 {
2626 int cr = static_cast<int>(c);
2627 while ( cr >= 0 && tableContents[row][cr].colSpan)
2628 {
2629 cr--;
2630 };
2631 if (cr >= 0 && tableContents[row][cr].cellText == "^") continue;
2632 }
2633 size_t rowSpan = 1, spanRow = row+1;
2634 while ((spanRow < tableContents.size()) &&
2635 (tableContents[spanRow][c].cellText == "^"))
2636 {
2637 spanRow++;
2638 rowSpan++;
2639 }
2640
2641 out+=" <" + cellTag + " " + cellClass;
2642 // use appropriate alignment style
2643 switch (columnAlignment[c])
2644 {
2645 case AlignLeft: out+="Left\""; break;
2646 case AlignRight: out+="Right\""; break;
2647 case AlignCenter: out+="Center\""; break;
2648 case AlignNone: out+="None\""; break;
2649 }
2650
2651 if (rowSpan > 1)
2652 {
2653 QCString spanStr;
2654 spanStr.setNum(rowSpan);
2655 out+=" rowspan=\"" + spanStr + "\"";
2656 }
2657 // Column span handling, assumes that column spans will have
2658 // empty strings, which would indicate the sequence "||", used
2659 // to signify spanning columns.
2660 size_t colSpan = 1;
2661 while ((c+1 < columns) && tableContents[row][c+1].colSpan)
2662 {
2663 c++;
2664 colSpan++;
2665 }
2666 if (colSpan > 1)
2667 {
2668 QCString spanStr;
2669 spanStr.setNum(colSpan);
2670 out+=" colspan=\"" + spanStr + "\"";
2671 }
2672 // need at least one space on either side of the cell text in
2673 // order for doxygen to do other formatting
2674 out+="> " + cellText + " \\ilinebr </" + cellTag + ">";
2675 }
2676 cellTag = "td";
2677 cellClass = "class=\"markdownTableBody";
2678 out+=" </tr>";
2679 }
2680 out+="</table>\n";
2681
2682 AUTO_TRACE_EXIT("i={}",i);
2683 return i;
2684}
2685
2686
2687static bool hasLineBreak(std::string_view data)
2688{
2689 AUTO_TRACE("data='{}'",Trace::trunc(data));
2690 size_t i=0;
2691 size_t j=0;
2692 // search for end of line and also check if it is not a completely blank
2693 while (i<data.size() && data[i]!='\n')
2694 {
2695 if (data[i]!=' ' && data[i]!='\t') j++; // some non whitespace
2696 i++;
2697 }
2698 if (i>=data.size()) { return 0; } // empty line
2699 if (i<2) { return 0; } // not long enough
2700 bool res = (j>0 && data[i-1]==' ' && data[i-2]==' '); // non blank line with at two spaces at the end
2701 AUTO_TRACE_EXIT("result={}",res);
2702 return res;
2703}
2704
2705
2707{
2708 AUTO_TRACE("data='{}'",Trace::trunc(data));
2709 int level=0;
2710 QCString header;
2711 QCString id;
2712 if (isHRuler(data))
2713 {
2714 out+="<hr>\n";
2715 }
2716 else if ((level=isAtxHeader(data,header,id,TRUE)))
2717 {
2718 QCString hTag;
2719 if (!id.isEmpty())
2720 {
2721 switch (level)
2722 {
2723 case SectionType::Section: out+="@section "; break;
2724 case SectionType::Subsection: out+="@subsection "; break;
2725 case SectionType::Subsubsection: out+="@subsubsection "; break;
2726 case SectionType::Paragraph: out+="@paragraph "; break;
2727 case SectionType::Subparagraph: out+="@subparagraph "; break;
2728 case SectionType::Subsubparagraph: out+="@subsubparagraph "; break;
2729 }
2730 out+=id;
2731 out+=" ";
2732 out+=header;
2733 out+="\n";
2734 }
2735 else
2736 {
2737 hTag.sprintf("h%d",level);
2738 out+="<"+hTag+">";
2739 out+=header;
2740 out+="</"+hTag+">\n";
2741 }
2742 }
2743 else if (data.size()>0) // nothing interesting -> just output the line
2744 {
2745 size_t tmpSize = data.size();
2746 if (data[data.size()-1] == '\n') tmpSize--;
2747 out+=data.substr(0,tmpSize);
2748
2749 if (hasLineBreak(data))
2750 {
2751 out+="\\ilinebr<br>";
2752 }
2753 if (tmpSize != data.size()) out+='\n';
2754 }
2755}
2756
2757static const std::unordered_map<std::string,std::string> g_quotationHeaderMap = {
2758 // GitHub style Doxygen command
2759 { "[!note]", "\\note" },
2760 { "[!warning]", "\\warning" },
2761 { "[!tip]", "\\remark" },
2762 { "[!caution]", "\\attention" },
2763 { "[!important]", "\\important" }
2764};
2765
2766size_t Markdown::Private::writeBlockQuote(std::string_view data)
2767{
2768 AUTO_TRACE("data='{}'",Trace::trunc(data));
2769 size_t i=0;
2770 int curLevel=0;
2771 size_t end=0;
2772 const size_t size = data.size();
2773 std::string startCmd;
2774 int isGitHubAlert = false;
2775 int isGitHubFirst = false;
2776 while (i<size)
2777 {
2778 // find end of this line
2779 end=i+1;
2780 while (end<=size && data[end-1]!='\n') end++;
2781 size_t j=i;
2782 int level=0;
2783 size_t indent=i;
2784 // compute the quoting level
2785 while (j<end && (data[j]==' ' || data[j]=='>'))
2786 {
2787 if (data[j]=='>') { level++; indent=j+1; }
2788 else if (j>0 && data[j-1]=='>') indent=j+1;
2789 j++;
2790 }
2791 if (indent>0 && j>0 && data[j-1]=='>' &&
2792 !(j==size || data[j]=='\n')) // disqualify last > if not followed by space
2793 {
2794 indent--;
2795 level--;
2796 j--;
2797 }
2798 AUTO_TRACE_ADD("indent={} i={} j={} end={} level={} line={}",indent,i,j,end,level,Trace::trunc(&data[i]));
2799 if (level==0 && j<end-1 && !isListMarker(data.substr(j)) && !isHRuler(data.substr(j)))
2800 {
2801 level = curLevel; // lazy
2802 }
2803 if (level==1)
2804 {
2805 QCString txt = stripWhiteSpace(data.substr(indent,end-indent));
2806 auto it = g_quotationHeaderMap.find(txt.lower().str()); // TODO: in C++20 the std::string can be dropped
2807 if (it != g_quotationHeaderMap.end())
2808 {
2809 isGitHubAlert = true;
2810 isGitHubFirst = true;
2811 startCmd = it->second;
2812 }
2813 }
2814 if (level>curLevel) // quote level increased => add start markers
2815 {
2816 if (level!=1 || !isGitHubAlert) // normal block quote
2817 {
2818 for (int l=curLevel;l<level-1;l++)
2819 {
2820 out+="<blockquote>";
2821 }
2822 out += "<blockquote>&zwj;"; // empty blockquotes are also shown
2823 }
2824 else if (!startCmd.empty()) // GitHub style alert
2825 {
2826 out += startCmd + " ";
2827 }
2828 }
2829 else if (level<curLevel) // quote level decreased => add end markers
2830 {
2831 int decrLevel = curLevel;
2832 if (level==0 && isGitHubAlert)
2833 {
2834 decrLevel--;
2835 }
2836 for (int l=level;l<decrLevel;l++)
2837 {
2838 out += "</blockquote>\\ilinebr ";
2839 }
2840 }
2841 if (level==0)
2842 {
2843 curLevel=0;
2844 break; // end of quote block
2845 }
2846 // copy line without quotation marks
2847 if (curLevel!=0 || !isGitHubAlert)
2848 {
2849 std::string_view txt = data.substr(indent,end-indent);
2850 if (stripWhiteSpace(txt).empty() && !startCmd.empty())
2851 {
2852 if (!isGitHubFirst) out += "<br>";
2853 out += "<br>\n";
2854 }
2855 else
2856 {
2857 out += txt;
2858 }
2859 isGitHubFirst = false;
2860 }
2861 else // GitHub alert section
2862 {
2863 out+= "\n";
2864 }
2865 curLevel=level;
2866 // proceed with next line
2867 i=end;
2868 }
2869 // end of comment within blockquote => add end markers
2870 if (isGitHubAlert) // GitHub alert doesn't have a blockquote
2871 {
2872 curLevel--;
2873 }
2874 for (int l=0;l<curLevel;l++)
2875 {
2876 out+="</blockquote>";
2877 }
2878 AUTO_TRACE_EXIT("i={}",i);
2879 return i;
2880}
2881
2882// For code blocks that are outputted as part of an indented include or snippet command, we need to filter out
2883// the location string, i.e. '\ifile "..." \iline \ilinebr'.
2884bool skipOverFileAndLineCommands(std::string_view data,size_t indent,size_t &offset,std::string &location)
2885{
2886 size_t i = offset;
2887 size_t size = data.size();
2888 while (i<data.size() && data[i]==' ') i++;
2889 if (i<size+8 && data[i]=='\\' && qstrncmp(&data[i+1],"ifile \"",7)==0)
2890 {
2891 size_t locStart = i;
2892 if (i>offset) locStart--; // include the space before \ifile
2893 i+=8;
2894 bool found=false;
2895 while (i+9<size && data[i]!='\n')
2896 {
2897 if (data[i]=='\\' && qstrncmp(&data[i+1],"ilinebr ",8)==0)
2898 {
2899 found=true;
2900 break;
2901 }
2902 i++;
2903 }
2904 if (found)
2905 {
2906 i+=9;
2907 location=data.substr(locStart,i-locStart);
2908 location+='\n';
2909 while (indent>0 && i<size && data[i]==' ') i++,indent--;
2910 if (i<size && data[i]=='\n') i++;
2911 offset = i;
2912 return true;
2913 }
2914 }
2915 return false;
2916}
2917
2918size_t Markdown::Private::writeCodeBlock(std::string_view data,size_t refIndent)
2919{
2920 AUTO_TRACE("data='{}' refIndent={}",Trace::trunc(data),refIndent);
2921 const size_t size = data.size();
2922 size_t i=0;
2923 // no need for \ilinebr here as the previous line was empty and was skipped
2924 out+="@iverbatim\n";
2925 int emptyLines=0;
2926 std::string location;
2927 while (i<size)
2928 {
2929 // find end of this line
2930 size_t end=i+1;
2931 while (end<=size && data[end-1]!='\n') end++;
2932 size_t j=i;
2933 size_t indent=0;
2934 while (j<end && data[j]==' ') j++,indent++;
2935 //printf("j=%d end=%d indent=%d refIndent=%d tabSize=%d data={%s}\n",
2936 // j,end,indent,refIndent,Config_getInt(TAB_SIZE),qPrint(QCString(data+i).left(end-i-1)));
2937 if (j==end-1) // empty line
2938 {
2939 emptyLines++;
2940 i=end;
2941 }
2942 else if (indent>=refIndent+codeBlockIndent) // enough indent to continue the code block
2943 {
2944 while (emptyLines>0) // write skipped empty lines
2945 {
2946 // add empty line
2947 out+="\n";
2948 emptyLines--;
2949 }
2950 // add code line minus the indent
2951 size_t offset = i+refIndent+codeBlockIndent;
2952 std::string lineLoc;
2953 if (skipOverFileAndLineCommands(data,codeBlockIndent,offset,lineLoc))
2954 {
2955 location = lineLoc;
2956 }
2957 out+=data.substr(offset,end-offset);
2958 i=end;
2959 }
2960 else // end of code block
2961 {
2962 break;
2963 }
2964 }
2965 out+="@endiverbatim";
2966 if (!location.empty())
2967 {
2968 out+=location;
2969 }
2970 else
2971 {
2972 out+="\\ilinebr ";
2973 }
2974 while (emptyLines>0) // write skipped empty lines
2975 {
2976 // add empty line
2977 out+="\n";
2978 emptyLines--;
2979 }
2980 AUTO_TRACE_EXIT("i={}",i);
2981 return i;
2982}
2983
2984// start searching for the end of the line start at offset \a i
2985// keeping track of possible blocks that need to be skipped.
2986size_t Markdown::Private::findEndOfLine(std::string_view data,size_t offset)
2987{
2988 AUTO_TRACE("data='{}'",Trace::trunc(data));
2989 // find end of the line
2990 const size_t size = data.size();
2991 size_t nb=0, end=offset+1, j=0;
2992 while (end<=size && (j=isNewline(data.data()+end-1))==0)
2993 {
2994 // while looking for the end of the line we might encounter a block
2995 // that needs to be passed unprocessed.
2996 if ((data[end-1]=='\\' || data[end-1]=='@') && // command
2997 (end<=1 || (data[end-2]!='\\' && data[end-2]!='@')) // not escaped
2998 )
2999 {
3000 QCString endBlockName = isBlockCommand(data.substr(end-1),end-1);
3001 end++;
3002 if (!endBlockName.isEmpty())
3003 {
3004 size_t l = endBlockName.length();
3005 for (;end+l+1<size;end++) // search for end of block marker
3006 {
3007 if ((data[end]=='\\' || data[end]=='@') &&
3008 data[end-1]!='\\' && data[end-1]!='@'
3009 )
3010 {
3011 if (qstrncmp(&data[end+1],endBlockName.data(),l)==0)
3012 {
3013 // found end marker, skip over this block
3014 //printf("feol.block out={%s}\n",qPrint(QCString(data+i).left(end+l+1-i)));
3015 end = end + l + 2;
3016 break;
3017 }
3018 }
3019 }
3020 }
3021 }
3022 else if (nb==0 && data[end-1]=='<' && size>=6 && end+6<size &&
3023 (end<=1 || (data[end-2]!='\\' && data[end-2]!='@'))
3024 )
3025 {
3026 if (tolower(data[end])=='p' && tolower(data[end+1])=='r' &&
3027 tolower(data[end+2])=='e' && (data[end+3]=='>' || data[end+3]==' ')) // <pre> tag
3028 {
3029 // skip part until including </pre>
3030 end = end + processHtmlTagWrite(data.substr(end-1),end-1,false);
3031 break;
3032 }
3033 else
3034 {
3035 end++;
3036 }
3037 }
3038 else if (nb==0 && data[end-1]=='`')
3039 {
3040 while (end<=size && data[end-1]=='`') end++,nb++;
3041 }
3042 else if (nb>0 && data[end-1]=='`')
3043 {
3044 size_t enb=0;
3045 while (end<=size && data[end-1]=='`') end++,enb++;
3046 if (enb==nb) nb=0;
3047 }
3048 else
3049 {
3050 end++;
3051 }
3052 }
3053 if (j>0) end+=j-1;
3054 AUTO_TRACE_EXIT("offset={} end={}",offset,end);
3055 return end;
3056}
3057
3058void Markdown::Private::writeFencedCodeBlock(std::string_view data,std::string_view lang,
3059 size_t blockStart,size_t blockEnd)
3060{
3061 AUTO_TRACE("data='{}' lang={} blockStart={} blockEnd={}",Trace::trunc(data),lang,blockStart,blockEnd);
3062 if (!lang.empty() && lang[0]=='.') lang=lang.substr(1);
3063 const size_t size=data.size();
3064 size_t i=0;
3065 while (i<size && (data[i]==' ' || data[i]=='\t'))
3066 {
3067 out+=data[i++];
3068 blockStart--;
3069 blockEnd--;
3070 }
3071 out+="@icode";
3072 if (!lang.empty())
3073 {
3074 out+="{"+lang+"}";
3075 }
3076 out+=" ";
3077 addStrEscapeUtf8Nbsp(data.substr(blockStart+i,blockEnd-blockStart));
3078 out+="@endicode ";
3079}
3080
3081QCString Markdown::Private::processQuotations(std::string_view data,size_t refIndent)
3082{
3083 AUTO_TRACE("data='{}' refIndex='{}'",Trace::trunc(data),refIndent);
3084 out.clear();
3085 size_t i=0,end=0;
3086 size_t pi=std::string::npos;
3087 bool newBlock = false;
3088 bool insideList = false;
3089 size_t currentIndent = refIndent;
3090 size_t listIndent = refIndent;
3091 const size_t size = data.size();
3092 QCString lang;
3093 while (i<size)
3094 {
3095 end = findEndOfLine(data,i);
3096 // line is now found at [i..end)
3097
3098 size_t lineIndent=0;
3099 while (lineIndent<end && data[i+lineIndent]==' ') lineIndent++;
3100 //printf("** lineIndent=%d line=(%s)\n",lineIndent,qPrint(QCString(data+i).left(end-i)));
3101
3102 if (newBlock)
3103 {
3104 //printf("** end of block\n");
3105 if (insideList && lineIndent<currentIndent) // end of list
3106 {
3107 //printf("** end of list\n");
3108 currentIndent = refIndent;
3109 insideList = false;
3110 }
3111 newBlock = false;
3112 }
3113
3114 if ((listIndent=isListMarker(data.substr(i,end-i)))) // see if we need to increase the indent level
3115 {
3116 if (listIndent<currentIndent+4)
3117 {
3118 //printf("** start of list\n");
3119 insideList = true;
3120 currentIndent = listIndent;
3121 }
3122 }
3123 else if (isEndOfList(data.substr(i,end-i)))
3124 {
3125 //printf("** end of list\n");
3126 insideList = false;
3127 currentIndent = listIndent;
3128 }
3129 else if (isEmptyLine(data.substr(i,end-i)))
3130 {
3131 //printf("** new block\n");
3132 newBlock = true;
3133 }
3134 //printf("currentIndent=%d listIndent=%d refIndent=%d\n",currentIndent,listIndent,refIndent);
3135
3136 if (pi!=std::string::npos)
3137 {
3138 size_t blockStart=0, blockEnd=0, blockOffset=0;
3139 if (isFencedCodeBlock(data.substr(pi),currentIndent,lang,blockStart,blockEnd,blockOffset))
3140 {
3141 auto addSpecialCommand = [&](const QCString &startCmd,const QCString &endCmd)
3142 {
3143 size_t cmdPos = pi+blockStart+1;
3144 QCString pl = data.substr(cmdPos,blockEnd-blockStart-1);
3145 size_t ii = 0;
3146 int nl = 1;
3147 // check for absence of start command, either @start<cmd>, or \\start<cmd>
3148 while (ii<pl.length() && qisspace(pl[ii]))
3149 {
3150 if (pl[ii]=='\n') nl++;
3151 ii++; // skip leading whitespace
3152 }
3153 bool addNewLines = false;
3154 if (ii+startCmd.length()>=pl.length() || // no room for start command
3155 (pl[ii]!='\\' && pl[ii]!='@') || // no @ or \ after whitespace
3156 qstrncmp(pl.data()+ii+1,startCmd.data(),startCmd.length())!=0) // no start command
3157 {
3158 // input: output:
3159 // ----------------------------------------------------
3160 // ```{plantuml} => @startuml
3161 // A->B A->B
3162 // ``` @enduml
3163 // ----------------------------------------------------
3164 pl = "@"+startCmd+"\n" + pl + "@"+endCmd;
3165 addNewLines = false;
3166 }
3167 else // we have a @start... command inside the code block
3168 {
3169 // input: output:
3170 // ----------------------------------------------------
3171 // ```{plantuml} \n
3172 // \n
3173 // @startuml => @startuml
3174 // A->B A->B
3175 // @enduml @enduml
3176 // ``` \n
3177 // ----------------------------------------------------
3178 addNewLines = true;
3179 }
3180 if (addNewLines) for (int j=0;j<nl;j++) out+='\n';
3181 processSpecialCommand(pl.view().substr(ii),ii);
3182 if (addNewLines) out+='\n';
3183 };
3184
3185 if (!Config_getString(PLANTUML_JAR_PATH).isEmpty() && lang=="plantuml")
3186 {
3187 addSpecialCommand("startuml","enduml");
3188 }
3189 else if (Config_getBool(HAVE_DOT) && lang=="dot")
3190 {
3191 addSpecialCommand("dot","enddot");
3192 }
3193 else if (lang=="msc") // msc is built-in
3194 {
3195 addSpecialCommand("msc","endmsc");
3196 }
3197 else // normal code block
3198 {
3199 writeFencedCodeBlock(data.substr(pi),lang.view(),blockStart,blockEnd);
3200 }
3201 i=pi+blockOffset;
3202 pi=std::string::npos;
3203 end=i+1;
3204 continue;
3205 }
3206 else if (isBlockQuote(data.substr(pi,i-pi),currentIndent))
3207 {
3208 i = pi+writeBlockQuote(data.substr(pi));
3209 pi=std::string::npos;
3210 end=i+1;
3211 continue;
3212 }
3213 else
3214 {
3215 //printf("quote out={%s}\n",QCString(data+pi).left(i-pi).data());
3216 out+=data.substr(pi,i-pi);
3217 }
3218 }
3219 pi=i;
3220 i=end;
3221 }
3222 if (pi!=std::string::npos && pi<size) // deal with the last line
3223 {
3224 if (isBlockQuote(data.substr(pi),currentIndent))
3225 {
3226 writeBlockQuote(data.substr(pi));
3227 }
3228 else
3229 {
3230 out+=data.substr(pi);
3231 }
3232 }
3233
3234 //printf("Process quotations\n---- input ----\n%s\n---- output ----\n%s\n------------\n",
3235 // qPrint(s),prv->out.get());
3236
3237 return out;
3238}
3239
3240QCString Markdown::Private::processBlocks(std::string_view data,const size_t indent)
3241{
3242 AUTO_TRACE("data='{}' indent={}",Trace::trunc(data),indent);
3243 out.clear();
3244 size_t pi = std::string::npos;
3245 QCString id,link,title;
3246
3247#if 0 // commented out, since starting with a comment block is probably a usage error
3248 // see also http://stackoverflow.com/q/20478611/784672
3249
3250 // special case when the documentation starts with a code block
3251 // since the first line is skipped when looking for a code block later on.
3252 if (end>codeBlockIndent && isCodeBlock(data,0,end,blockIndent))
3253 {
3254 i=writeCodeBlock(out,data,size,blockIndent);
3255 end=i+1;
3256 pi=-1;
3257 }
3258#endif
3259
3260 size_t currentIndent = indent;
3261 size_t listIndent = indent;
3262 bool insideList = false;
3263 bool newBlock = false;
3264 // process each line
3265 size_t i=0;
3266 while (i<data.size())
3267 {
3268 size_t end = findEndOfLine(data,i);
3269 // line is now found at [i..end)
3270
3271 size_t lineIndent=0;
3272 int level = 0;
3273 while (lineIndent<end && data[i+lineIndent]==' ') lineIndent++;
3274 //printf("** lineIndent=%d line=(%s)\n",lineIndent,qPrint(QCString(data+i).left(end-i)));
3275
3276 if (newBlock)
3277 {
3278 //printf("** end of block\n");
3279 if (insideList && lineIndent<currentIndent) // end of list
3280 {
3281 //printf("** end of list\n");
3282 currentIndent = indent;
3283 insideList = false;
3284 }
3285 newBlock = false;
3286 }
3287
3288 if ((listIndent=isListMarker(data.substr(i,end-i)))) // see if we need to increase the indent level
3289 {
3290 if (listIndent<currentIndent+4)
3291 {
3292 //printf("** start of list\n");
3293 insideList = true;
3294 currentIndent = listIndent;
3295 }
3296 }
3297 else if (isEndOfList(data.substr(i,end-i)))
3298 {
3299 //printf("** end of list\n");
3300 insideList = false;
3301 currentIndent = listIndent;
3302 }
3303 else if (isEmptyLine(data.substr(i,end-i)))
3304 {
3305 //printf("** new block\n");
3306 newBlock = true;
3307 }
3308
3309 //printf("indent=%d listIndent=%d blockIndent=%d\n",indent,listIndent,blockIndent);
3310
3311 //printf("findEndOfLine: pi=%d i=%d end=%d\n",pi,i,end);
3312
3313 if (pi!=std::string::npos)
3314 {
3315 size_t blockStart=0, blockEnd=0, blockOffset=0;
3316 QCString lang;
3317 size_t blockIndent = currentIndent;
3318 size_t ref = 0;
3319 //printf("isHeaderLine(%s)=%d\n",QCString(data+i).left(size-i).data(),level);
3320 QCString endBlockName;
3321 if (data[i]=='@' || data[i]=='\\') endBlockName = isBlockCommand(data.substr(i),i);
3322 if (!endBlockName.isEmpty())
3323 {
3324 // handle previous line
3325 if (isLinkRef(data.substr(pi,i-pi),id,link,title))
3326 {
3327 linkRefs.emplace(id.lower().str(),LinkRef(link,title));
3328 }
3329 else
3330 {
3331 writeOneLineHeaderOrRuler(data.substr(pi,i-pi));
3332 }
3333 out+=data[i];
3334 i++;
3335 size_t l = endBlockName.length();
3336 while (i+l<data.size())
3337 {
3338 if ((data[i]=='\\' || data[i]=='@') && // command
3339 data[i-1]!='\\' && data[i-1]!='@') // not escaped
3340 {
3341 if (qstrncmp(&data[i+1],endBlockName.data(),l)==0)
3342 {
3343 out+=data[i];
3344 out+=endBlockName;
3345 i+=l+1;
3346 break;
3347 }
3348 }
3349 out+=data[i];
3350 i++;
3351 }
3352 }
3353 else if ((level=isHeaderline(data.substr(i),TRUE))>0)
3354 {
3355 //printf("Found header at %d-%d\n",i,end);
3356 while (pi<data.size() && data[pi]==' ') pi++;
3357 QCString header = data.substr(pi,i-pi-1);
3358 id = extractTitleId(header, level);
3359 //printf("header='%s' is='%s'\n",qPrint(header),qPrint(id));
3360 if (!header.isEmpty())
3361 {
3362 if (!id.isEmpty())
3363 {
3364 out+=level==1?"@section ":"@subsection ";
3365 out+=id;
3366 out+=" ";
3367 out+=header;
3368 out+="\n\n";
3369 }
3370 else
3371 {
3372 out+=level==1?"<h1>":"<h2>";
3373 out+=header;
3374 out+=level==1?"\n</h1>\n":"\n</h2>\n";
3375 }
3376 }
3377 else
3378 {
3379 out+="\n<hr>\n";
3380 }
3381 pi=std::string::npos;
3382 i=end;
3383 end=i+1;
3384 continue;
3385 }
3386 else if ((ref=isLinkRef(data.substr(pi),id,link,title)))
3387 {
3388 //printf("found link ref: id='%s' link='%s' title='%s'\n",
3389 // qPrint(id),qPrint(link),qPrint(title));
3390 linkRefs.emplace(id.lower().str(),LinkRef(link,title));
3391 i=ref+pi;
3392 end=i+1;
3393 }
3394 else if (isFencedCodeBlock(data.substr(pi),currentIndent,lang,blockStart,blockEnd,blockOffset))
3395 {
3396 //printf("Found FencedCodeBlock lang='%s' start=%d end=%d code={%s}\n",
3397 // qPrint(lang),blockStart,blockEnd,QCString(data+pi+blockStart).left(blockEnd-blockStart).data());
3398 writeFencedCodeBlock(data.substr(pi),lang.view(),blockStart,blockEnd);
3399 i=pi+blockOffset;
3400 pi=std::string::npos;
3401 end=i+1;
3402 continue;
3403 }
3404 else if (isCodeBlock(data.substr(i,end-i),i,blockIndent))
3405 {
3406 // skip previous line (it is empty anyway)
3407 i+=writeCodeBlock(data.substr(i),blockIndent);
3408 pi=std::string::npos;
3409 end=i+1;
3410 continue;
3411 }
3412 else if (isTableBlock(data.substr(pi)))
3413 {
3414 i=pi+writeTableBlock(data.substr(pi));
3415 pi=std::string::npos;
3416 end=i+1;
3417 continue;
3418 }
3419 else
3420 {
3421 writeOneLineHeaderOrRuler(data.substr(pi,i-pi));
3422 }
3423 }
3424 pi=i;
3425 i=end;
3426 }
3427 //printf("last line %d size=%d\n",i,size);
3428 if (pi!=std::string::npos && pi<data.size()) // deal with the last line
3429 {
3430 if (isLinkRef(data.substr(pi),id,link,title))
3431 {
3432 //printf("found link ref: id='%s' link='%s' title='%s'\n",
3433 // qPrint(id),qPrint(link),qPrint(title));
3434 linkRefs.emplace(id.lower().str(),LinkRef(link,title));
3435 }
3436 else
3437 {
3438 writeOneLineHeaderOrRuler(data.substr(pi));
3439 }
3440 }
3441
3442 return out;
3443}
3444
3445
3447{
3448 AUTO_TRACE("docs={}",Trace::trunc(docs));
3449 size_t i=0;
3450 std::string_view data(docs.str());
3451 const size_t size = data.size();
3452 if (!data.empty())
3453 {
3454 while (i<size && (data[i]==' ' || data[i]=='\n'))
3455 {
3456 i++;
3457 }
3458 if (i+5<size && data[i]=='<' && qstrncmp(&data[i],"<!--!",5)==0) // skip over <!--! marker
3459 {
3460 i+=5;
3461 while (i<size && (data[i]==' ' || data[i]=='\n')) // skip over spaces after the <!--! marker
3462 {
3463 i++;
3464 }
3465 }
3466 if (i+1<size &&
3467 (data[i]=='\\' || data[i]=='@') &&
3468 (qstrncmp(&data[i+1],"page ",5)==0 || qstrncmp(&data[i+1],"mainpage",8)==0)
3469 )
3470 {
3471 if (qstrncmp(&data[i+1],"page ",5)==0)
3472 {
3473 AUTO_TRACE_EXIT("result=ExplicitPageResult::explicitPage");
3475 }
3476 else
3477 {
3478 AUTO_TRACE_EXIT("result=ExplicitPageResult::explicitMainPage");
3480 }
3481 }
3482 else if (i+1<size &&
3483 (data[i]=='\\' || data[i]=='@') &&
3484 (qstrncmp(&data[i+1],"dir\n",4)==0 || qstrncmp(&data[i+1],"dir ",4)==0)
3485 )
3486 {
3487 AUTO_TRACE_EXIT("result=ExplicitPageResult::explicitDirPage");
3489 }
3490 }
3491 AUTO_TRACE_EXIT("result=ExplicitPageResult::notExplicit");
3493}
3494
3495QCString Markdown::extractPageTitle(QCString &docs, QCString &id, int &prepend, bool &isIdGenerated)
3496{
3497 AUTO_TRACE("docs={} prepend={}",Trace::trunc(docs),id,prepend);
3498 // first first non-empty line
3499 prepend = 0;
3500 QCString title;
3501 size_t i=0;
3502 QCString docs_org(docs);
3503 std::string_view data(docs_org.str());
3504 const size_t size = data.size();
3505 docs.clear();
3506 while (i<size && (data[i]==' ' || data[i]=='\n'))
3507 {
3508 if (data[i]=='\n') prepend++;
3509 i++;
3510 }
3511 if (i>=size) { return QCString(); }
3512 size_t end1=i+1;
3513 while (end1<size && data[end1-1]!='\n') end1++;
3514 //printf("i=%d end1=%d size=%d line='%s'\n",i,end1,size,docs.mid(i,end1-i).data());
3515 // first line from i..end1
3516 if (end1<size)
3517 {
3518 // second line form end1..end2
3519 size_t end2=end1+1;
3520 while (end2<size && data[end2-1]!='\n') end2++;
3521 if (prv->isHeaderline(data.substr(end1),FALSE))
3522 {
3523 title = data.substr(i,end1-i-1);
3524 docs+="\n\n"+docs_org.mid(end2);
3525 id = prv->extractTitleId(title, 0, &isIdGenerated);
3526 //printf("extractPageTitle(title='%s' docs='%s' id='%s')\n",title.data(),docs.data(),id.data());
3527 AUTO_TRACE_EXIT("result={} id={} isIdGenerated={}",Trace::trunc(title),id,isIdGenerated);
3528 return title;
3529 }
3530 }
3531 if (i<end1 && prv->isAtxHeader(data.substr(i,end1-i),title,id,FALSE,&isIdGenerated)>0)
3532 {
3533 docs+="\n";
3534 docs+=docs_org.mid(end1);
3535 }
3536 else
3537 {
3538 docs=docs_org;
3539 id = prv->extractTitleId(title, 0, &isIdGenerated);
3540 }
3541 AUTO_TRACE_EXIT("result={} id={} isIdGenerated={}",Trace::trunc(title),id,isIdGenerated);
3542 return title;
3543}
3544
3545
3546//---------------------------------------------------------------------------
3547
3548QCString Markdown::process(const QCString &input, int &startNewlines, bool fromParseInput)
3549{
3550 if (input.isEmpty()) return input;
3551 size_t refIndent=0;
3552
3553 // for replace tabs by spaces
3554 QCString s = input;
3555 if (s.at(s.length()-1)!='\n') s += "\n"; // see PR #6766
3556 s = detab(s,refIndent);
3557 //printf("======== DeTab =========\n---- output -----\n%s\n---------\n",qPrint(s));
3558
3559 // then process quotation blocks (as these may contain other blocks)
3560 s = prv->processQuotations(s.view(),refIndent);
3561 //printf("======== Quotations =========\n---- output -----\n%s\n---------\n",qPrint(s));
3562
3563 // then process block items (headers, rules, and code blocks, references)
3564 s = prv->processBlocks(s.view(),refIndent);
3565 //printf("======== Blocks =========\n---- output -----\n%s\n---------\n",qPrint(s));
3566
3567 // finally process the inline markup (links, emphasis and code spans)
3568 prv->out.clear();
3569 prv->out.reserve(s.length());
3570 prv->processInline(s.view());
3571 if (fromParseInput)
3572 {
3573 Debug::print(Debug::Markdown,0,"---- output -----\n{}\n=========\n",qPrint(prv->out));
3574 }
3575 else
3576 {
3577 Debug::print(Debug::Markdown,0,"======== Markdown =========\n---- input ------- \n{}\n---- output -----\n{}\n=========\n",input,prv->out);
3578 }
3579
3580 // post processing
3581 QCString result = substitute(prv->out,g_doxy_nbsp,"&nbsp;");
3582 const char *p = result.data();
3583 if (p)
3584 {
3585 while (*p==' ') p++; // skip over spaces
3586 while (*p=='\n') {startNewlines++;p++;}; // skip over newlines
3587 if (qstrncmp(p,"<br>",4)==0) p+=4; // skip over <br>
3588 }
3589 if (p>result.data())
3590 {
3591 // strip part of the input
3592 result = result.mid(static_cast<int>(p-result.data()));
3593 }
3594 return result;
3595}
3596
3597//---------------------------------------------------------------------------
3598
3600{
3601 AUTO_TRACE("fileName={}",fileName);
3602 std::string absFileName = FileInfo(fileName.str()).absFilePath();
3603 QCString baseFn = stripFromPath(absFileName.c_str());
3604 int i = baseFn.findRev('.');
3605 if (i!=-1) baseFn = baseFn.left(i);
3606 QCString baseName = escapeCharsInString(baseFn,false,false);
3607 //printf("markdownFileNameToId(%s)=md_%s\n",qPrint(fileName),qPrint(baseName));
3608 QCString res = "md_"+baseName;
3609 AUTO_TRACE_EXIT("result={}",res);
3610 return res;
3611}
3612
3613//---------------------------------------------------------------------------
3614
3619
3621{
3622}
3623
3627
3629 const char *fileBuf,
3630 const std::shared_ptr<Entry> &root,
3631 ClangTUParser* /*clangParser*/)
3632{
3633 std::shared_ptr<Entry> current = std::make_shared<Entry>();
3634 int prepend = 0; // number of empty lines in front
3635 current->lang = SrcLangExt::Markdown;
3636 current->fileName = fileName;
3637 current->docFile = fileName;
3638 current->docLine = 1;
3639 QCString docs = fileBuf;
3640 Debug::print(Debug::Markdown,0,"======== Markdown =========\n---- input ------- \n{}\n",fileBuf);
3641 QCString id;
3642 Markdown markdown(fileName,1,0);
3643 bool isIdGenerated = false;
3644 QCString title = markdown.extractPageTitle(docs, id, prepend, isIdGenerated).stripWhiteSpace();
3645 QCString generatedId;
3646 if (isIdGenerated)
3647 {
3648 generatedId = id;
3649 id = "";
3650 }
3651 int indentLevel=title.isEmpty() ? 0 : -1;
3652 markdown.setIndentLevel(indentLevel);
3653 FileInfo fi(fileName.str());
3654 QCString fn = fi.fileName();
3656 QCString mdfileAsMainPage = Config_getString(USE_MDFILE_AS_MAINPAGE);
3657 QCString mdFileNameId = markdownFileNameToId(fileName);
3658 bool wasEmpty = id.isEmpty();
3659 if (wasEmpty) id = mdFileNameId;
3660 QCString relFileName = stripFromPath(fileName);
3661 bool isSubdirDocs = Config_getBool(IMPLICIT_DIR_DOCS) && relFileName.lower().endsWith("/readme.md");
3662 switch (isExplicitPage(docs))
3663 {
3665 if (!mdfileAsMainPage.isEmpty() &&
3666 (fi.absFilePath()==FileInfo(mdfileAsMainPage.str()).absFilePath()) // file reference with path
3667 )
3668 {
3669 docs.prepend("@ianchor{" + title + "} " + id + "\\ilinebr ");
3670 docs.prepend("@mainpage "+title+"\\ilinebr ");
3671 }
3672 else if (id=="mainpage" || id=="index")
3673 {
3674 if (title.isEmpty()) title = titleFn;
3675 docs.prepend("@ianchor{" + title + "} " + id + "\\ilinebr ");
3676 docs.prepend("@mainpage "+title+"\\ilinebr ");
3677 }
3678 else if (isSubdirDocs)
3679 {
3680 docs.prepend("@dir\\ilinebr ");
3681 }
3682 else
3683 {
3684 if (title.isEmpty())
3685 {
3686 title = titleFn;
3687 prepend = 0;
3688 }
3689 if (!wasEmpty)
3690 {
3691 docs.prepend("@ianchor{" + title + "} " + id + "\\ilinebr @ianchor{" + relFileName + "} " + mdFileNameId + "\\ilinebr ");
3692 }
3693 else if (!generatedId.isEmpty())
3694 {
3695 docs.prepend("@ianchor " + generatedId + "\\ilinebr ");
3696 }
3697 else if (Config_getEnum(MARKDOWN_ID_STYLE)==MARKDOWN_ID_STYLE_t::GITHUB)
3698 {
3699 QCString autoId = AnchorGenerator::instance().generate(title.str());
3700 docs.prepend("@ianchor{" + title + "} " + autoId + "\\ilinebr ");
3701 }
3702 docs.prepend("@page "+id+" "+title+"\\ilinebr ");
3703 }
3704 for (int i = 0; i < prepend; i++) docs.prepend("\n");
3705 break;
3707 {
3708 // look for `@page label My Title\n` and capture `label` (match[1]) and ` My Title` (match[2])
3709 static const reg::Ex re(R"([ ]*[\\@]page\s+(\a[\w-]*)(\s*[^\n]*)\n)");
3710 reg::Match match;
3711 std::string s = docs.str();
3712 if (reg::search(s,match,re))
3713 {
3714 QCString orgLabel = match[1].str();
3715 QCString orgTitle = match[2].str();
3716 orgTitle = orgTitle.stripWhiteSpace();
3717 QCString newLabel = markdownFileNameToId(fileName);
3718 docs = docs.left(match[1].position())+ // part before label
3719 newLabel+ // new label
3720 match[2].str()+ // part between orgLabel and \n
3721 "\\ilinebr @ianchor{" + orgTitle + "} "+orgLabel+"\n"+ // add original anchor plus \n of above
3722 docs.right(docs.length()-match.length()); // add remainder of docs
3723 }
3724 }
3725 break;
3727 break;
3729 break;
3730 }
3731 int lineNr=1;
3732
3733 p->commentScanner.enterFile(fileName,lineNr);
3735 bool needsEntry = false;
3736 int position=0;
3737 GuardedSectionStack guards;
3738 QCString processedDocs = markdown.process(docs,lineNr,true);
3739 while (p->commentScanner.parseCommentBlock(
3740 this,
3741 current.get(),
3742 processedDocs,
3743 fileName,
3744 lineNr,
3745 FALSE, // isBrief
3746 FALSE, // javadoc autobrief
3747 FALSE, // inBodyDocs
3748 prot, // protection
3749 position,
3750 needsEntry,
3751 true,
3752 &guards
3753 ))
3754 {
3755 if (needsEntry)
3756 {
3757 QCString docFile = current->docFile;
3758 root->moveToSubEntryAndRefresh(current);
3759 current->lang = SrcLangExt::Markdown;
3760 current->docFile = docFile;
3761 current->docLine = lineNr;
3762 }
3763 }
3764 if (needsEntry)
3765 {
3766 root->moveToSubEntryAndKeep(current);
3767 }
3768 p->commentScanner.leaveFile(fileName,lineNr);
3769}
3770
3772{
3773 Doxygen::parserManager->getOutlineParser("*.cpp")->parsePrototype(text);
3774}
3775
3776//------------------------------------------------------------------------
#define eol
The end of line string for this machine.
static AnchorGenerator & instance()
Returns the singleton instance.
Definition anchor.cpp:38
static std::string addPrefixIfNeeded(const std::string &anchor)
Definition anchor.cpp:46
std::string generate(const std::string &title)
generates an anchor for a section with title.
Definition anchor.cpp:59
Clang parser object for a single translation unit, which consists of a source file and the directly o...
Definition clangparser.h:25
@ Markdown
Definition debug.h:37
static void print(DebugMask mask, int prio, fmt::format_string< Args... > fmt, Args &&... args)
Definition debug.h:76
static ParserManager * parserManager
Definition doxygen.h:131
static FileNameLinkedMap * imageNameLinkedMap
Definition doxygen.h:106
A model of a file symbol.
Definition filedef.h:99
Minimal replacement for QFileInfo.
Definition fileinfo.h:23
bool exists() const
Definition fileinfo.cpp:30
std::string fileName() const
Definition fileinfo.cpp:118
bool isReadable() const
Definition fileinfo.cpp:44
std::string absFilePath() const
Definition fileinfo.cpp:101
Helper class to process markdown formatted text.
Definition markdown.h:32
std::unique_ptr< Private > prv
Definition markdown.h:43
void setIndentLevel(int level)
Definition markdown.cpp:191
QCString extractPageTitle(QCString &docs, QCString &id, int &prepend, bool &isIdGenerated)
Markdown(const QCString &fileName, int lineNr, int indentLevel=0)
Definition markdown.cpp:182
QCString process(const QCString &input, int &startNewlines, bool fromParseInput=false)
void parseInput(const QCString &fileName, const char *fileBuf, const std::shared_ptr< Entry > &root, ClangTUParser *clangParser) override
Parses a single input file with the goal to build an Entry tree.
~MarkdownOutlineParser() override
void parsePrototype(const QCString &text) override
Callback function called by the comment block scanner.
std::unique_ptr< Private > p
Definition markdown.h:60
This is an alternative implementation of QCString.
Definition qcstring.h:101
int find(char c, int index=0, bool cs=TRUE) const
Definition qcstring.cpp:43
QCString & prepend(const char *s)
Definition qcstring.h:407
size_t length() const
Returns the length of the string, not counting the 0-terminator.
Definition qcstring.h:153
QCString mid(size_t index, size_t len=static_cast< size_t >(-1)) const
Definition qcstring.h:226
QCString lower() const
Definition qcstring.h:234
bool endsWith(const char *s) const
Definition qcstring.h:509
char & at(size_t i)
Returns a reference to the character at index i.
Definition qcstring.h:578
bool isEmpty() const
Returns TRUE iff the string is empty.
Definition qcstring.h:150
QCString stripWhiteSpace() const
returns a copy of this string with leading and trailing whitespace removed
Definition qcstring.h:245
const std::string & str() const
Definition qcstring.h:537
QCString & setNum(short n)
Definition qcstring.h:444
QCString simplifyWhiteSpace() const
return a copy of this string with leading and trailing whitespace removed and multiple whitespace cha...
Definition qcstring.cpp:185
QCString right(size_t len) const
Definition qcstring.h:219
size_t size() const
Returns the length of the string, not counting the 0-terminator.
Definition qcstring.h:156
QCString & sprintf(const char *format,...)
Definition qcstring.cpp:29
int findRev(char c, int index=-1, bool cs=TRUE) const
Definition qcstring.cpp:91
const char * data() const
Returns a pointer to the contents of the string in the form of a 0-terminated C string.
Definition qcstring.h:159
std::string_view view() const
Definition qcstring.h:161
QCString left(size_t len) const
Definition qcstring.h:214
void clear()
Definition qcstring.h:169
static constexpr int Section
Definition section.h:33
static constexpr int MaxLevel
Definition section.h:39
static constexpr int Subsection
Definition section.h:34
static constexpr int Subsubsection
Definition section.h:35
static constexpr int MinLevel
Definition section.h:32
static constexpr int Paragraph
Definition section.h:36
static constexpr int Subsubparagraph
Definition section.h:38
static constexpr int Subparagraph
Definition section.h:37
Class representing a regular expression.
Definition regex.h:39
Object representing the matching results.
Definition regex.h:153
Interface for the comment block scanner.
std::stack< GuardedSection > GuardedSectionStack
Definition commentscan.h:48
#define Config_getInt(name)
Definition config.h:34
#define Config_getBool(name)
Definition config.h:33
#define Config_getString(name)
Definition config.h:32
#define Config_getEnum(name)
Definition config.h:35
std::vector< std::string > StringVector
Definition containers.h:33
DirIterator end(const DirIterator &) noexcept
Definition dir.cpp:175
#define AUTO_TRACE_ADD(...)
Definition docnode.cpp:47
#define AUTO_TRACE(...)
Definition docnode.cpp:46
#define AUTO_TRACE_EXIT(...)
Definition docnode.cpp:48
#define AUTO_TRACE(...)
Definition markdown.cpp:61
static bool hasLineBreak(std::string_view data)
#define isIdChar(c)
Definition markdown.cpp:77
ExplicitPageResult
Definition markdown.cpp:67
@ explicitDirPage
docs start with a dir command
Definition markdown.cpp:70
@ explicitMainPage
docs start with a mainpage command
Definition markdown.cpp:69
@ explicitPage
docs start with a page command
Definition markdown.cpp:68
@ notExplicit
docs doesn't start with either page or mainpage
Definition markdown.cpp:71
static bool isBlockQuote(std::string_view data, size_t indent)
returns true if this line starts a block quote
static size_t isLinkRef(std::string_view data, QCString &refid, QCString &link, QCString &title)
returns end of the link ref if this is indeed a link reference.
static QCString escapeDoubleQuotes(const QCString &s)
Definition markdown.cpp:217
static bool isEndOfList(std::string_view data)
static size_t computeIndentExcludingListMarkers(std::string_view data)
static Alignment markersToAlignment(bool leftMarker, bool rightMarker)
helper function to convert presence of left and/or right alignment markers to an alignment value
Definition markdown.cpp:289
const char * g_doxy_nbsp
Definition markdown.cpp:200
static QCString escapeSpecialChars(const QCString &s)
Definition markdown.cpp:235
static bool isCodeBlock(std::string_view data, size_t offset, size_t &indent)
static bool isEmptyLine(std::string_view data)
#define AUTO_TRACE_EXIT(...)
Definition markdown.cpp:63
#define isLiTag(i)
static size_t findTableColumns(std::string_view data, size_t &start, size_t &end, size_t &columns)
Finds the location of the table's contains in the string data.
const size_t codeBlockIndent
Definition markdown.cpp:201
static ExplicitPageResult isExplicitPage(const QCString &docs)
const char * g_utf8_nbsp
Definition markdown.cpp:199
#define ignoreCloseEmphChar(c, cn)
Definition markdown.cpp:100
static const std::unordered_map< std::string, std::string > g_quotationHeaderMap
#define isOpenEmphChar(c)
Definition markdown.cpp:93
Alignment
Definition markdown.cpp:194
@ AlignLeft
Definition markdown.cpp:194
@ AlignNone
Definition markdown.cpp:194
@ AlignRight
Definition markdown.cpp:194
@ AlignCenter
Definition markdown.cpp:194
static bool isFencedCodeBlock(std::string_view data, size_t refIndent, QCString &lang, size_t &start, size_t &end, size_t &offset)
static size_t isListMarker(std::string_view data)
static bool isHRuler(std::string_view data)
static QCString getFilteredImageAttributes(std::string_view fmt, const QCString &attrs)
parse the image attributes and return attributes for given format
Definition markdown.cpp:310
bool skipOverFileAndLineCommands(std::string_view data, size_t indent, size_t &offset, std::string &location)
#define extraChar(c)
Definition markdown.cpp:84
static bool isTableBlock(std::string_view data)
Returns TRUE iff data points to the start of a table block.
size_t isNewline(std::string_view data)
Definition markdown.cpp:207
QCString markdownFileNameToId(const QCString &fileName)
processes string s and converts markdown into doxygen/html commands.
#define warn(file, line, fmt,...)
Definition message.h:97
bool isAbsolutePath(const QCString &fileName)
Definition portable.cpp:514
const char * strnstr(const char *haystack, const char *needle, size_t haystack_len)
Definition portable.cpp:617
QCString trunc(const QCString &s, size_t numChars=15)
Definition trace.h:56
Definition message.h:144
bool search(std::string_view str, Match &match, const Ex &re, size_t pos)
Search in a given string str starting at position pos for a match against regular expression re.
Definition regex.cpp:748
Portable versions of functions that are platform dependent.
static void decrLevel(yyscan_t yyscanner)
Definition pre.l:2177
QCString substitute(const QCString &s, const QCString &src, const QCString &dst)
substitute all occurrences of src in s by dst
Definition qcstring.cpp:477
int qstrncmp(const char *str1, const char *str2, size_t len)
Definition qcstring.h:75
bool qisspace(char c)
Definition qcstring.h:81
const char * qPrint(const char *s)
Definition qcstring.h:672
#define TRUE
Definition qcstring.h:37
#define FALSE
Definition qcstring.h:34
Some helper functions for std::string.
std::string_view stripWhiteSpace(std::string_view s)
Given a string view s, returns a new, narrower view on that string, skipping over any leading or trai...
Definition stringutil.h:72
int processEmphasis1(std::string_view data, char c)
process single emphasis
Definition markdown.cpp:811
int processQuoted(std::string_view data, size_t offset)
Process quoted section "...", can contain one embedded newline.
Definition markdown.cpp:981
void writeMarkdownImage(std::string_view fmt, bool inline_img, bool explicitTitle, const QCString &title, const QCString &content, const QCString &link, const QCString &attributes, const FileDef *fd)
size_t writeTableBlock(std::string_view data)
size_t writeBlockQuote(std::string_view data)
size_t isSpecialCommand(std::string_view data, size_t offset)
Definition markdown.cpp:426
std::function< int(std::string_view, size_t)> Action_t
Definition markdown.cpp:172
int processEmphasis3(std::string_view data, char c)
Parsing triple emphasis.
Definition markdown.cpp:877
int processCodeSpan(std::string_view data, size_t offset)
` parsing a code span (assuming codespan != 0)
int processSpecialCommand(std::string_view data, size_t offset)
QCString extractTitleId(QCString &title, int level, bool *pIsIdGenerated=nullptr)
void writeFencedCodeBlock(std::string_view data, std::string_view lang, size_t blockStart, size_t blockEnd)
int isHeaderline(std::string_view data, bool allowAdjustLevel)
returns whether the line is a setext-style hdr underline
size_t findEmphasisChar(std::string_view, char c, size_t c_size)
looks for the next emph char, skipping other constructs, and stopping when either it is found,...
Definition markdown.cpp:698
std::unordered_map< std::string, LinkRef > linkRefs
Definition markdown.cpp:174
void addStrEscapeUtf8Nbsp(std::string_view data)
QCString isBlockCommand(std::string_view data, size_t offset)
Definition markdown.cpp:357
size_t writeCodeBlock(std::string_view, size_t refIndent)
int processHtmlTag(std::string_view data, size_t offset)
QCString processQuotations(std::string_view data, size_t refIndent)
QCString processBlocks(std::string_view data, size_t indent)
int processEmphasis(std::string_view data, size_t offset)
int processLink(std::string_view data, size_t offset)
int processHtmlTagWrite(std::string_view data, size_t offset, bool doWrite)
Process a HTML tag.
int isAtxHeader(std::string_view data, QCString &header, QCString &id, bool allowAdjustLevel, bool *pIsIdGenerated=nullptr)
size_t findEndOfLine(std::string_view data, size_t offset)
int processEmphasis2(std::string_view data, char c)
process double emphasis
Definition markdown.cpp:845
void processInline(std::string_view data)
int processNmdash(std::string_view data, size_t offset)
Process ndash and mdashes.
Definition markdown.cpp:939
void writeOneLineHeaderOrRuler(std::string_view data)
std::array< Action_t, 256 > actions
Definition markdown.cpp:179
Protection
Protection level of members.
Definition types.h:26
@ Public
Definition types.h:26
SrcLangExt
Language as given by extension.
Definition types.h:42
@ Markdown
Definition types.h:57
SrcLangExt getLanguageFromFileName(const QCString &fileName, SrcLangExt defLang)
Definition util.cpp:5645
QCString escapeCharsInString(const QCString &name, bool allowDots, bool allowUnderscore)
Definition util.cpp:3770
bool found
Definition util.cpp:984
QCString stripExtensionGeneral(const QCString &fName, const QCString &ext)
Definition util.cpp:5351
bool isURL(const QCString &url)
Checks whether the given url starts with a supported protocol.
Definition util.cpp:6342
static QCString stripFromPath(const QCString &p, const StringVector &l)
Definition util.cpp:309
QCString detab(const QCString &s, size_t &refIndent)
Definition util.cpp:7144
StringVector split(const std::string &s, const std::string &delimiter)
split input string s by string delimiter delimiter.
Definition util.cpp:7042
QCString externalLinkTarget(const bool parent)
Definition util.cpp:6118
QCString getFileNameExtension(const QCString &fn)
Definition util.cpp:5687
FileDef * findFileDef(const FileNameLinkedMap *fnMap, const QCString &n, bool &ambig)
Definition util.cpp:3348
A bunch of utility functions.