Doxygen
Loading...
Searching...
No Matches
markdown.cpp
Go to the documentation of this file.
1/******************************************************************************
2 *
3 * Copyright (C) 1997-2020 by Dimitri van Heesch.
4 *
5 * Permission to use, copy, modify, and distribute this software and its
6 * documentation under the terms of the GNU General Public License is hereby
7 * granted. No representations are made about the suitability of this software
8 * for any purpose. It is provided "as is" without express or implied warranty.
9 * See the GNU General Public License for more details.
10 *
11 * Documents produced by Doxygen are derivative works derived from the
12 * input used in their production; they are not affected by this license.
13 *
14 */
15
16/* Note: part of the code below is inspired by libupskirt written by
17 * Natacha Porté. Original copyright message follows:
18 *
19 * Copyright (c) 2008, Natacha Porté
20 *
21 * Permission to use, copy, modify, and distribute this software for any
22 * purpose with or without fee is hereby granted, provided that the above
23 * copyright notice and this permission notice appear in all copies.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
26 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
27 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
28 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
29 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
30 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
31 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
32 */
33
34#include <stdio.h>
35
36#include <unordered_map>
37#include <functional>
38#include <atomic>
39#include <array>
40#include <string_view>
41
42#include "markdown.h"
43#include "debug.h"
44#include "util.h"
45#include "doxygen.h"
46#include "commentscan.h"
47#include "entry.h"
48#include "config.h"
49#include "message.h"
50#include "portable.h"
51#include "regex.h"
52#include "fileinfo.h"
53#include "trace.h"
54#include "anchor.h"
55#include "stringutil.h"
56
57#if !ENABLE_MARKDOWN_TRACING
58#undef AUTO_TRACE
59#undef AUTO_TRACE_ADD
60#undef AUTO_TRACE_EXIT
61#define AUTO_TRACE(...) (void)0
62#define AUTO_TRACE_ADD(...) (void)0
63#define AUTO_TRACE_EXIT(...) (void)0
64#endif
65
67{
68 explicitPage, /**< docs start with a page command */
69 explicitMainPage, /**< docs start with a mainpage command */
70 explicitDirPage, /**< docs start with a dir command */
71 notExplicit /**< docs doesn't start with either page or mainpage */
72};
73
74//-----------
75
76// is character c part of an identifier?
77#define isIdChar(c) \
78 ((c>='a' && c<='z') || \
79 (c>='A' && c<='Z') || \
80 (c>='0' && c<='9') || \
81 (static_cast<unsigned char>(c)>=0x80)) // unicode characters
82
83// is character allowed right at the beginning of an emphasis section
84#define extraChar(c) \
85 (c=='-' || c=='+' || c=='!' || \
86 c=='?' || c=='$' || c=='@' || \
87 c=='&' || c=='*' || c=='%' || \
88 c=='[' || c=='(' || c=='.' || \
89 c=='>' || c==':' || c==',' || \
90 c==';' || c=='\'' || c=='"' || c=='`')
91
92// is character at position i in data allowed before an emphasis section
93#define isOpenEmphChar(c) \
94 (c=='\n' || c==' ' || c=='\'' || c=='<' || \
95 c=='>' || c=='{' || c=='(' || c=='[' || \
96 c==',' || c==':' || c==';')
97
98// is character at position i in data an escape that prevents ending an emphasis section
99// so for example *bla (*.txt) is cool*
100#define ignoreCloseEmphChar(c,cn) \
101 (c=='(' || c=='{' || c=='[' || (c=='<' && cn!='/') || \
102 c=='\\' || \
103 c=='@')
104//----------
105
106struct TableCell
107{
108 TableCell() : colSpan(false) {}
109 QCString cellText;
110 bool colSpan;
111};
112
113struct Markdown::Private
114{
115 Private(const QCString &fn,int line,int indent)
116 : fileName(fn), lineNr(line), indentLevel(indent)
117 {
118 // setup callback table for special characters
119 actions[static_cast<unsigned int>('_')] = [this](std::string_view data,size_t offset) { return processEmphasis (data,offset); };
120 actions[static_cast<unsigned int>('*')] = [this](std::string_view data,size_t offset) { return processEmphasis (data,offset); };
121 actions[static_cast<unsigned int>('~')] = [this](std::string_view data,size_t offset) { return processEmphasis (data,offset); };
122 actions[static_cast<unsigned int>('`')] = [this](std::string_view data,size_t offset) { return processCodeSpan (data,offset); };
123 actions[static_cast<unsigned int>('\\')]= [this](std::string_view data,size_t offset) { return processSpecialCommand(data,offset); };
124 actions[static_cast<unsigned int>('@')] = [this](std::string_view data,size_t offset) { return processSpecialCommand(data,offset); };
125 actions[static_cast<unsigned int>('[')] = [this](std::string_view data,size_t offset) { return processLink (data,offset); };
126 actions[static_cast<unsigned int>('!')] = [this](std::string_view data,size_t offset) { return processLink (data,offset); };
127 actions[static_cast<unsigned int>('<')] = [this](std::string_view data,size_t offset) { return processHtmlTag (data,offset); };
128 actions[static_cast<unsigned int>('-')] = [this](std::string_view data,size_t offset) { return processNmdash (data,offset); };
129 actions[static_cast<unsigned int>('"')] = [this](std::string_view data,size_t offset) { return processQuoted (data,offset); };
130 }
131
132 QCString processQuotations(std::string_view data,size_t refIndent);
133 QCString processBlocks(std::string_view data,size_t indent);
134 QCString isBlockCommand(std::string_view data,size_t offset);
135 size_t isSpecialCommand(std::string_view data,size_t offset);
136 size_t findEndOfLine(std::string_view data,size_t offset);
137 int processHtmlTagWrite(std::string_view data,size_t offset,bool doWrite);
138 int processHtmlTag(std::string_view data,size_t offset);
139 int processEmphasis(std::string_view data,size_t offset);
140 int processEmphasis1(std::string_view data,char c);
141 int processEmphasis2(std::string_view data,char c);
142 int processEmphasis3(std::string_view data,char c);
143 int processNmdash(std::string_view data,size_t offset);
144 int processQuoted(std::string_view data,size_t offset);
145 int processCodeSpan(std::string_view data,size_t offset);
146 int processSpecialCommand(std::string_view data,size_t offset);
147 int processLink(std::string_view data,size_t offset);
148 size_t findEmphasisChar(std::string_view, char c, size_t c_size);
149 void addStrEscapeUtf8Nbsp(std::string_view data);
150 void processInline(std::string_view data);
151 void writeMarkdownImage(std::string_view fmt, bool inline_img, bool explicitTitle,
152 const QCString &title, const QCString &content,
153 const QCString &link, const QCString &attributes,
154 const FileDef *fd);
155 int isHeaderline(std::string_view data, bool allowAdjustLevel);
156 int isAtxHeader(std::string_view data, QCString &header,QCString &id,bool allowAdjustLevel,
157 bool *pIsIdGenerated=nullptr);
158 void writeOneLineHeaderOrRuler(std::string_view data);
159 void writeFencedCodeBlock(std::string_view data, std::string_view lang,
160 size_t blockStart,size_t blockEnd);
161 size_t writeBlockQuote(std::string_view data);
162 size_t writeCodeBlock(std::string_view,size_t refIndent);
163 size_t writeTableBlock(std::string_view data);
164 QCString extractTitleId(QCString &title, int level,bool *pIsIdGenerated=nullptr);
165
166 struct LinkRef
167 {
168 LinkRef(const QCString &l,const QCString &t) : link(l), title(t) {}
169 QCString link;
170 QCString title;
171 };
172 using Action_t = std::function<int(std::string_view,size_t)>;
173
174 std::unordered_map<std::string,LinkRef> linkRefs;
176 int lineNr = 0;
177 int indentLevel=0; // 0 is outside markdown, -1=page level
179 std::array<Action_t,256> actions;
180};
181
182Markdown::Markdown(const QCString &fileName,int lineNr,int indentLevel)
183 : prv(std::make_unique<Private>(fileName,lineNr,indentLevel))
184{
185 using namespace std::placeholders;
186 (void)lineNr; // not used yet
187}
188
189Markdown::~Markdown() = default;
190
191void Markdown::setIndentLevel(int level) { prv->indentLevel = level; }
192
193
195
196
197//---------- constants -------
198//
199const char *g_utf8_nbsp = "\xc2\xa0"; // UTF-8 nbsp
200const char *g_doxy_nbsp = "&_doxy_nbsp;"; // doxygen escape command for UTF-8 nbsp
201const size_t codeBlockIndent = 4;
202
203//---------- helpers -------
204
205// test if the next characters in data represent a new line (which can be character \n or string \ilinebr).
206// returns 0 if no newline is found, or the number of characters that make up the newline if found.
207inline size_t isNewline(std::string_view data)
208{
209 // normal newline
210 if (data[0] == '\n') return 1;
211 // artificial new line from ^^ in ALIASES
212 if (data[0] == '\\' && qstrncmp(data.data()+1,"ilinebr ",7)==0) return data[8]==' ' ? 9 : 8;
213 return 0;
214}
215
216// escape double quotes in string
218{
219 AUTO_TRACE("s={}",Trace::trunc(s));
220 if (s.isEmpty()) return s;
221 QCString result;
222 const char *p=s.data();
223 char c=0, pc='\0';
224 while ((c=*p++))
225 {
226 if (c=='"' && pc!='\\') result+='\\';
227 result+=c;
228 pc=c;
229 }
230 AUTO_TRACE_EXIT("result={}",result);
231 return result;
232}
233
234// escape characters that have a special meaning later on.
236{
237 AUTO_TRACE("s={}",Trace::trunc(s));
238 if (s.isEmpty()) return s;
239 bool insideQuote=FALSE;
240 QCString result;
241 const char *p=s.data();
242 char c=0, pc='\0';
243 while ((c=*p++))
244 {
245 switch (c)
246 {
247 case '"':
248 if (pc!='\\') { insideQuote=!insideQuote; }
249 result+=c;
250 break;
251 case '<':
252 // fall through
253 case '>':
254 if (!insideQuote)
255 {
256 result+='\\';
257 result+=c;
258 if ((p[0]==':') && (p[1]==':'))
259 {
260 result+='\\';
261 result+=':';
262 p++;
263 }
264 }
265 else
266 {
267 result+=c;
268 }
269 break;
270 case '\\': if (!insideQuote) { result+='\\'; } result+='\\'; break;
271 case '@': if (!insideQuote) { result+='\\'; } result+='@'; break;
272 // commented out next line due to regression when using % to suppress a link
273 //case '%': if (!insideQuote) { result+='\\'; } result+='%'; break;
274 case '#': if (!insideQuote) { result+='\\'; } result+='#'; break;
275 case '$': if (!insideQuote) { result+='\\'; } result+='$'; break;
276 case '&': if (!insideQuote) { result+='\\'; } result+='&'; break;
277 default:
278 result+=c; break;
279 }
280 pc=c;
281 }
282 AUTO_TRACE_EXIT("result={}",result);
283 return result;
284}
285
286/** helper function to convert presence of left and/or right alignment markers
287 * to a alignment value
288 */
289static Alignment markersToAlignment(bool leftMarker,bool rightMarker)
290{
291 if (leftMarker && rightMarker)
292 {
293 return AlignCenter;
294 }
295 else if (leftMarker)
296 {
297 return AlignLeft;
298 }
299 else if (rightMarker)
300 {
301 return AlignRight;
302 }
303 else
304 {
305 return AlignNone;
306 }
307}
308
309/** parse the image attributes and return attributes for given format */
310static QCString getFilteredImageAttributes(std::string_view fmt, const QCString &attrs)
311{
312 AUTO_TRACE("fmt={} attrs={}",fmt,attrs);
313 StringVector attrList = split(attrs.str(),",");
314 for (const auto &attr_ : attrList)
315 {
316 QCString attr = QCString(attr_).stripWhiteSpace();
317 int i = attr.find(':');
318 if (i>0) // has format
319 {
320 QCString format = attr.left(i).stripWhiteSpace().lower();
321 if (format == fmt) // matching format
322 {
323 AUTO_TRACE_EXIT("result={}",attr.mid(i+1));
324 return attr.mid(i+1); // keep part after :
325 }
326 }
327 else // option that applies to all formats
328 {
329 AUTO_TRACE_EXIT("result={}",attr);
330 return attr;
331 }
332 }
333 return QCString();
334}
335
336// Check if data contains a block command. If so returned the command
337// that ends the block. If not an empty string is returned.
338// Note When offset>0 character position -1 will be inspected.
339//
340// Checks for and skip the following block commands:
341// {@code .. { .. } .. }
342// \dot .. \enddot
343// \code .. \endcode
344// \msc .. \endmsc
345// \f$..\f$
346// \f(..\f)
347// \f[..\f]
348// \f{..\f}
349// \verbatim..\endverbatim
350// \iliteral..\endiliteral
351// \latexonly..\endlatexonly
352// \htmlonly..\endhtmlonly
353// \xmlonly..\endxmlonly
354// \rtfonly..\endrtfonly
355// \manonly..\endmanonly
356// \startuml..\enduml
357QCString Markdown::Private::isBlockCommand(std::string_view data,size_t offset)
358{
359 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
360
361 using EndBlockFunc = QCString (*)(const std::string &,bool,char);
362
363 static const auto getEndBlock = [](const std::string &blockName,bool,char) -> QCString
364 {
365 return "end"+blockName;
366 };
367 static const auto getEndCode = [](const std::string &blockName,bool openBracket,char) -> QCString
368 {
369 return openBracket ? QCString("}") : "end"+blockName;
370 };
371 static const auto getEndUml = [](const std::string &/* blockName */,bool,char) -> QCString
372 {
373 return "enduml";
374 };
375 static const auto getEndFormula = [](const std::string &/* blockName */,bool,char nextChar) -> QCString
376 {
377 switch (nextChar)
378 {
379 case '$': return "f$";
380 case '(': return "f)";
381 case '[': return "f]";
382 case '{': return "f}";
383 }
384 return "";
385 };
386
387 // table mapping a block start command to a function that can return the matching end block string
388 static const std::unordered_map<std::string,EndBlockFunc> blockNames =
389 {
390 { "dot", getEndBlock },
391 { "code", getEndCode },
392 { "icode", getEndBlock },
393 { "msc", getEndBlock },
394 { "verbatim", getEndBlock },
395 { "iverbatim", getEndBlock },
396 { "iliteral", getEndBlock },
397 { "latexonly", getEndBlock },
398 { "htmlonly", getEndBlock },
399 { "xmlonly", getEndBlock },
400 { "rtfonly", getEndBlock },
401 { "manonly", getEndBlock },
402 { "docbookonly", getEndBlock },
403 { "startuml", getEndUml },
404 { "f", getEndFormula }
405 };
406
407 const size_t size = data.size();
408 bool openBracket = offset>0 && data.data()[-1]=='{';
409 bool isEscaped = offset>0 && (data.data()[-1]=='\\' || data.data()[-1]=='@');
410 if (isEscaped) return QCString();
411
412 size_t end=1;
413 while (end<size && (data[end]>='a' && data[end]<='z')) end++;
414 if (end==1) return QCString();
415 std::string blockName(data.substr(1,end-1));
416 auto it = blockNames.find(blockName);
417 QCString result;
418 if (it!=blockNames.end()) // there is a function assigned
419 {
420 result = it->second(blockName, openBracket, end<size ? data[end] : 0);
421 }
422 AUTO_TRACE_EXIT("result={}",result);
423 return result;
424}
425
426size_t Markdown::Private::isSpecialCommand(std::string_view data,size_t offset)
427{
428 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
429
430 using EndCmdFunc = size_t (*)(std::string_view,size_t);
431
432 static const auto endOfLine = [](std::string_view data_,size_t offset_) -> size_t
433 {
434 // skip until the end of line (allowing line continuation characters)
435 char lc = 0;
436 char c = 0;
437 while (offset_<data_.size() && ((c=data_[offset_])!='\n' || lc=='\\'))
438 {
439 if (c=='\\') lc='\\'; // last character was a line continuation
440 else if (c!=' ') lc=0; // rest line continuation
441 offset_++;
442 }
443 return offset_;
444 };
445
446 static const auto endOfLabel = [](std::string_view data_,size_t offset_) -> size_t
447 {
448 if (offset_<data_.size() && data_[offset_]==' ') // we expect a space before the label
449 {
450 char c = 0;
451 offset_++;
452 // skip over spaces
453 while (offset_<data_.size() && data_[offset_]==' ') offset_++;
454 // skip over label
455 while (offset_<data_.size() && (c=data_[offset_])!=' ' && c!='\\' && c!='@' && c!='\n') offset_++;
456 return offset_;
457 }
458 return 0;
459 };
460
461 static const auto endOfLabelOpt = [](std::string_view data_,size_t offset_) -> size_t
462 {
463 size_t index=offset_;
464 if (index<data_.size() && data_[index]==' ') // skip over optional spaces
465 {
466 index++;
467 while (index<data_.size() && data_[index]==' ') index++;
468 }
469 if (index<data_.size() && data_[index]=='{') // find matching '}'
470 {
471 index++;
472 char c = 0;
473 while (index<data_.size() && (c=data_[index])!='}' && c!='\\' && c!='@' && c!='\n') index++;
474 if (index==data_.size() || data_[index]!='}') return 0; // invalid option
475 offset_=index+1; // part after {...} is the option
476 }
477 return endOfLabel(data_,offset_);
478 };
479
480 static const auto endOfParam = [](std::string_view data_,size_t offset_) -> size_t
481 {
482 size_t index=offset_;
483 if (index<data_.size() && data_[index]==' ') // skip over optional spaces
484 {
485 index++;
486 while (index<data_.size() && data_[index]==' ') index++;
487 }
488 if (index<data_.size() && data_[index]=='[') // find matching ']'
489 {
490 index++;
491 char c = 0;
492 while (index<data_.size() && (c=data_[index])!=']' && c!='\n') index++;
493 if (index==data_.size() || data_[index]!=']') return 0; // invalid parameter
494 offset_=index+1; // part after [...] is the parameter name
495 }
496 return endOfLabel(data_,offset_);
497 };
498
499 static const auto endOfFuncLike = [](std::string_view data_,size_t offset_,bool allowSpaces) -> size_t
500 {
501 if (offset_<data_.size() && data_[offset_]==' ') // we expect a space before the name
502 {
503 char c=0;
504 offset_++;
505 // skip over spaces
506 while (offset_<data_.size() && data_[offset_]==' ')
507 {
508 offset_++;
509 }
510 // skip over name (and optionally type)
511 while (offset_<data_.size() && (c=data_[offset_])!='\n' && (allowSpaces || c!=' ') && c!='(')
512 {
513 offset_++;
514 }
515 if (c=='(') // find the end of the function
516 {
517 int count=1;
518 offset_++;
519 while (offset_<data_.size() && (c=data_[offset_++]))
520 {
521 if (c=='(') count++;
522 else if (c==')') count--;
523 if (count==0) return offset_;
524 }
525 }
526 return offset_;
527 }
528 return 0;
529 };
530
531 static const auto endOfFunc = [](std::string_view data_,size_t offset_) -> size_t
532 {
533 return endOfFuncLike(data_,offset_,true);
534 };
535
536 static const auto endOfGuard = [](std::string_view data_,size_t offset_) -> size_t
537 {
538 return endOfFuncLike(data_,offset_,false);
539 };
540
541 static const std::unordered_map<std::string,EndCmdFunc> cmdNames =
542 {
543 { "a", endOfLabel },
544 { "addindex", endOfLine },
545 { "addtogroup", endOfLabel },
546 { "anchor", endOfLabel },
547 { "b", endOfLabel },
548 { "c", endOfLabel },
549 { "category", endOfLine },
550 { "cite", endOfLabel },
551 { "class", endOfLine },
552 { "concept", endOfLine },
553 { "copybrief", endOfFunc },
554 { "copydetails", endOfFunc },
555 { "copydoc", endOfFunc },
556 { "def", endOfFunc },
557 { "defgroup", endOfLabel },
558 { "diafile", endOfLine },
559 { "dir", endOfLine },
560 { "dockbookinclude",endOfLine },
561 { "dontinclude", endOfLine },
562 { "dotfile", endOfLine },
563 { "e", endOfLabel },
564 { "elseif", endOfGuard },
565 { "em", endOfLabel },
566 { "emoji", endOfLabel },
567 { "enum", endOfLabel },
568 { "example", endOfLine },
569 { "exception", endOfLine },
570 { "extends", endOfLabel },
571 { "file", endOfLine },
572 { "fn", endOfFunc },
573 { "headerfile", endOfLine },
574 { "htmlinclude", endOfLine },
575 { "ianchor", endOfLabelOpt },
576 { "idlexcept", endOfLine },
577 { "if", endOfGuard },
578 { "ifnot", endOfGuard },
579 { "image", endOfLine },
580 { "implements", endOfLine },
581 { "include", endOfLine },
582 { "includedoc", endOfLine },
583 { "includelineno", endOfLine },
584 { "ingroup", endOfLabel },
585 { "interface", endOfLine },
586 { "latexinclude", endOfLine },
587 { "maninclude", endOfLine },
588 { "memberof", endOfLabel },
589 { "mscfile", endOfLine },
590 { "namespace", endOfLabel },
591 { "noop", endOfLine },
592 { "overload", endOfLine },
593 { "p", endOfLabel },
594 { "package", endOfLabel },
595 { "page", endOfLabel },
596 { "paragraph", endOfLabel },
597 { "param", endOfParam },
598 { "property", endOfLine },
599 { "protocol", endOfLine },
600 { "qualifier", endOfLine },
601 { "ref", endOfLabel },
602 { "refitem", endOfLine },
603 { "related", endOfLabel },
604 { "relatedalso", endOfLabel },
605 { "relates", endOfLabel },
606 { "relatesalso", endOfLabel },
607 { "retval", endOfLabel },
608 { "rtfinclude", endOfLine },
609 { "section", endOfLabel },
610 { "skip", endOfLine },
611 { "skipline", endOfLine },
612 { "snippet", endOfLine },
613 { "snippetdoc", endOfLine },
614 { "snippetlineno", endOfLine },
615 { "struct", endOfLine },
616 { "subpage", endOfLabel },
617 { "subparagraph", endOfLabel },
618 { "subsubparagraph",endOfLabel },
619 { "subsection", endOfLabel },
620 { "subsubsection", endOfLabel },
621 { "throw", endOfLabel },
622 { "throws", endOfLabel },
623 { "tparam", endOfLabel },
624 { "typedef", endOfLine },
625 { "plantumlfile", endOfLine },
626 { "union", endOfLine },
627 { "until", endOfLine },
628 { "var", endOfLine },
629 { "verbinclude", endOfLine },
630 { "weakgroup", endOfLabel },
631 { "xmlinclude", endOfLine },
632 { "xrefitem", endOfLabel }
633 };
634
635 bool isEscaped = offset>0 && (data.data()[-1]=='\\' || data.data()[-1]=='@');
636 if (isEscaped) return 0;
637
638 const size_t size = data.size();
639 size_t end=1;
640 while (end<size && (data[end]>='a' && data[end]<='z')) end++;
641 if (end==1) return 0;
642 std::string cmdName(data.substr(1,end-1));
643 size_t result=0;
644 auto it = cmdNames.find(cmdName);
645 if (it!=cmdNames.end()) // command with parameters that should be ignored by markdown
646 {
647 // find the end of the parameters
648 result = it->second(data,end);
649 }
650 AUTO_TRACE_EXIT("result={}",result);
651 return result;
652}
653
654/** looks for the next emph char, skipping other constructs, and
655 * stopping when either it is found, or we are at the end of a paragraph.
656 */
657size_t Markdown::Private::findEmphasisChar(std::string_view data, char c, size_t c_size)
658{
659 AUTO_TRACE("data='{}' c={} c_size={}",Trace::trunc(data),c,c_size);
660 size_t i = 1;
661 const size_t size = data.size();
662
663 while (i<size)
664 {
665 while (i<size && data[i]!=c &&
666 data[i]!='\\' && data[i]!='@' &&
667 !(data[i]=='/' && data[i-1]=='<') && // html end tag also ends emphasis
668 data[i]!='\n') i++;
669 // avoid overflow (unclosed emph token)
670 if (i==size)
671 {
672 return 0;
673 }
674 //printf("findEmphasisChar: data=[%s] i=%d c=%c\n",data,i,data[i]);
675
676 // not counting escaped chars or characters that are unlikely
677 // to appear as the end of the emphasis char
678 if (ignoreCloseEmphChar(data[i-1],data[i]))
679 {
680 i++;
681 continue;
682 }
683 else
684 {
685 // get length of emphasis token
686 size_t len = 0;
687 while (i+len<size && data[i+len]==c)
688 {
689 len++;
690 }
691
692 if (len>0)
693 {
694 if (len!=c_size || (i+len<size && isIdChar(data[i+len]))) // to prevent touching some_underscore_identifier
695 {
696 i+=len;
697 continue;
698 }
699 AUTO_TRACE_EXIT("result={}",i);
700 return static_cast<int>(i); // found it
701 }
702 }
703
704 // skipping a code span
705 if (data[i]=='`')
706 {
707 int snb=0;
708 while (i<size && data[i]=='`') snb++,i++;
709
710 // find same pattern to end the span
711 int enb=0;
712 while (i<size && enb<snb)
713 {
714 if (data[i]=='`') enb++;
715 if (snb==1 && data[i]=='\'') break; // ` ended by '
716 i++;
717 }
718 }
719 else if (data[i]=='@' || data[i]=='\\')
720 { // skip over blocks that should not be processed
721 QCString endBlockName = isBlockCommand(data.substr(i),i);
722 if (!endBlockName.isEmpty())
723 {
724 i++;
725 size_t l = endBlockName.length();
726 while (i+l<size)
727 {
728 if ((data[i]=='\\' || data[i]=='@') && // command
729 data[i-1]!='\\' && data[i-1]!='@') // not escaped
730 {
731 if (qstrncmp(&data[i+1],endBlockName.data(),l)==0)
732 {
733 break;
734 }
735 }
736 i++;
737 }
738 }
739 else if (i+1<size && isIdChar(data[i+1])) // @cmd, stop processing, see bug 690385
740 {
741 return 0;
742 }
743 else
744 {
745 i++;
746 }
747 }
748 else if (data[i-1]=='<' && data[i]=='/') // html end tag invalidates emphasis
749 {
750 return 0;
751 }
752 else if (data[i]=='\n') // end * or _ at paragraph boundary
753 {
754 i++;
755 while (i<size && data[i]==' ') i++;
756 if (i>=size || data[i]=='\n')
757 {
758 return 0;
759 } // empty line -> paragraph
760 }
761 else // should not get here!
762 {
763 i++;
764 }
765 }
766 return 0;
767}
768
769/** process single emphasis */
770int Markdown::Private::processEmphasis1(std::string_view data, char c)
771{
772 AUTO_TRACE("data='{}' c={}",Trace::trunc(data),c);
773 size_t i = 0;
774 const size_t size = data.size();
775
776 /* skipping one symbol if coming from emph3 */
777 if (size>1 && data[0]==c && data[1]==c) { i=1; }
778
779 while (i<size)
780 {
781 size_t len = findEmphasisChar(data.substr(i), c, 1);
782 if (len==0) { return 0; }
783 i+=len;
784 if (i>=size) { return 0; }
785
786 if (i+1<size && data[i+1]==c)
787 {
788 i++;
789 continue;
790 }
791 if (data[i]==c && data[i-1]!=' ' && data[i-1]!='\n')
792 {
793 out+="<em>";
794 processInline(data.substr(0,i));
795 out+="</em>";
796 AUTO_TRACE_EXIT("result={}",i+1);
797 return static_cast<int>(i+1);
798 }
799 }
800 return 0;
801}
802
803/** process double emphasis */
804int Markdown::Private::processEmphasis2(std::string_view data, char c)
805{
806 AUTO_TRACE("data='{}' c={}",Trace::trunc(data),c);
807 size_t i = 0;
808 const size_t size = data.size();
809
810 while (i<size)
811 {
812 size_t len = findEmphasisChar(data.substr(i), c, 2);
813 if (len==0)
814 {
815 return 0;
816 }
817 i += len;
818 if (i+1<size && data[i]==c && data[i+1]==c && i && data[i-1]!=' ' && data[i-1]!='\n')
819 {
820 if (c == '~') out+="<strike>";
821 else out+="<strong>";
822 processInline(data.substr(0,i));
823 if (c == '~') out+="</strike>";
824 else out+="</strong>";
825 AUTO_TRACE_EXIT("result={}",i+2);
826 return static_cast<int>(i+2);
827 }
828 i++;
829 }
830 return 0;
831}
832
833/** Parsing triple emphasis.
834 * Finds the first closing tag, and delegates to the other emph
835 */
836int Markdown::Private::processEmphasis3(std::string_view data,char c)
837{
838 AUTO_TRACE("data='{}' c={}",Trace::trunc(data),c);
839 size_t i = 0;
840 const size_t size = data.size();
841
842 while (i<size)
843 {
844 size_t len = findEmphasisChar(data.substr(i), c, 3);
845 if (len==0)
846 {
847 return 0;
848 }
849 i+=len;
850
851 /* skip whitespace preceded symbols */
852 if (data[i]!=c || data[i-1]==' ' || data[i-1]=='\n')
853 {
854 continue;
855 }
856
857 if (i+2<size && data[i+1]==c && data[i+2]==c)
858 {
859 out+="<em><strong>";
860 processInline(data.substr(0,i));
861 out+="</strong></em>";
862 AUTO_TRACE_EXIT("result={}",i+3);
863 return static_cast<int>(i+3);
864 }
865 else if (i+1<size && data[i+1]==c)
866 {
867 // double symbol found, handing over to emph1
868 len = processEmphasis1(std::string_view(data.data()-2, size+2), c);
869 if (len==0)
870 {
871 return 0;
872 }
873 else
874 {
875 AUTO_TRACE_EXIT("result={}",len-2);
876 return static_cast<int>(len - 2);
877 }
878 }
879 else
880 {
881 // single symbol found, handing over to emph2
882 len = processEmphasis2(std::string_view(data.data()-1, size+1), c);
883 if (len==0)
884 {
885 return 0;
886 }
887 else
888 {
889 AUTO_TRACE_EXIT("result={}",len-1);
890 return static_cast<int>(len - 1);
891 }
892 }
893 }
894 return 0;
895}
896
897/** Process ndash and mdashes */
898int Markdown::Private::processNmdash(std::string_view data,size_t offset)
899{
900 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
901 const size_t size = data.size();
902 // precondition: data[0]=='-'
903 size_t i=1;
904 int count=1;
905 if (i<size && data[i]=='-') // found --
906 {
907 count++,i++;
908 }
909 if (i<size && data[i]=='-') // found ---
910 {
911 count++,i++;
912 }
913 if (i<size && data[i]=='-') // found ----
914 {
915 count++;
916 }
917 if (count>=2 && offset>=2 && qstrncmp(data.data()-2,"<!",2)==0)
918 { AUTO_TRACE_EXIT("result={}",1-count); return 1-count; } // start HTML comment
919 if (count==2 && size > 2 && data[2]=='>')
920 { return 0; } // end HTML comment
921 if (count==3 && size > 3 && data[3]=='>')
922 { return 0; } // end HTML comment
923 if (count==2 && (offset<8 || qstrncmp(data.data()-8,"operator",8)!=0)) // -- => ndash
924 {
925 out+="&ndash;";
926 AUTO_TRACE_EXIT("result=2");
927 return 2;
928 }
929 else if (count==3) // --- => ndash
930 {
931 out+="&mdash;";
932 AUTO_TRACE_EXIT("result=3");
933 return 3;
934 }
935 // not an ndash or mdash
936 return 0;
937}
938
939/** Process quoted section "...", can contain one embedded newline */
940int Markdown::Private::processQuoted(std::string_view data,size_t)
941{
942 AUTO_TRACE("data='{}'",Trace::trunc(data));
943 const size_t size = data.size();
944 size_t i=1;
945 int nl=0;
946 while (i<size && data[i]!='"' && nl<2)
947 {
948 if (data[i]=='\n') nl++;
949 i++;
950 }
951 if (i<size && data[i]=='"' && nl<2)
952 {
953 out+=data.substr(0,i+1);
954 AUTO_TRACE_EXIT("result={}",i+2);
955 return static_cast<int>(i+1);
956 }
957 // not a quoted section
958 return 0;
959}
960
961/** Process a HTML tag. Note that <pre>..</pre> are treated specially, in
962 * the sense that all code inside is written unprocessed
963 */
964int Markdown::Private::processHtmlTagWrite(std::string_view data,size_t offset,bool doWrite)
965{
966 AUTO_TRACE("data='{}' offset={} doWrite={}",Trace::trunc(data),offset,doWrite);
967 if (offset>0 && data.data()[-1]=='\\') { return 0; } // escaped <
968
969 const size_t size = data.size();
970
971 // find the end of the html tag
972 size_t i=1;
973 size_t l=0;
974 // compute length of the tag name
975 while (i<size && isIdChar(data[i])) i++,l++;
976 QCString tagName(data.substr(1,i-1));
977 if (tagName.lower()=="pre") // found <pre> tag
978 {
979 bool insideStr=FALSE;
980 while (i+6<size)
981 {
982 char c=data[i];
983 if (!insideStr && c=='<') // potential start of html tag
984 {
985 if (data[i+1]=='/' &&
986 tolower(data[i+2])=='p' && tolower(data[i+3])=='r' &&
987 tolower(data[i+4])=='e' && tolower(data[i+5])=='>')
988 { // found </pre> tag, copy from start to end of tag
989 if (doWrite) out+=data.substr(0,i+6);
990 //printf("found <pre>..</pre> [%d..%d]\n",0,i+6);
991 AUTO_TRACE_EXIT("result={}",i+6);
992 return static_cast<int>(i+6);
993 }
994 }
995 else if (insideStr && c=='"')
996 {
997 if (data[i-1]!='\\') insideStr=FALSE;
998 }
999 else if (c=='"')
1000 {
1001 insideStr=TRUE;
1002 }
1003 i++;
1004 }
1005 }
1006 else // some other html tag
1007 {
1008 if (l>0 && i<size)
1009 {
1010 if (data[i]=='/' && i<size-1 && data[i+1]=='>') // <bla/>
1011 {
1012 //printf("Found htmlTag={%s}\n",qPrint(QCString(data).left(i+2)));
1013 if (doWrite) out+=data.substr(0,i+2);
1014 AUTO_TRACE_EXIT("result={}",i+2);
1015 return static_cast<int>(i+2);
1016 }
1017 else if (data[i]=='>') // <bla>
1018 {
1019 //printf("Found htmlTag={%s}\n",qPrint(QCString(data).left(i+1)));
1020 if (doWrite) out+=data.substr(0,i+1);
1021 AUTO_TRACE_EXIT("result={}",i+1);
1022 return static_cast<int>(i+1);
1023 }
1024 else if (data[i]==' ') // <bla attr=...
1025 {
1026 i++;
1027 bool insideAttr=FALSE;
1028 while (i<size)
1029 {
1030 if (!insideAttr && data[i]=='"')
1031 {
1032 insideAttr=TRUE;
1033 }
1034 else if (data[i]=='"' && data[i-1]!='\\')
1035 {
1036 insideAttr=FALSE;
1037 }
1038 else if (!insideAttr && data[i]=='>') // found end of tag
1039 {
1040 //printf("Found htmlTag={%s}\n",qPrint(QCString(data).left(i+1)));
1041 if (doWrite) out+=data.substr(0,i+1);
1042 AUTO_TRACE_EXIT("result={}",i+1);
1043 return static_cast<int>(i+1);
1044 }
1045 i++;
1046 }
1047 }
1048 }
1049 }
1050 AUTO_TRACE_EXIT("not a valid html tag");
1051 return 0;
1052}
1053
1054int Markdown::Private::processHtmlTag(std::string_view data,size_t offset)
1055{
1056 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
1057 return processHtmlTagWrite(data,offset,true);
1058}
1059
1060int Markdown::Private::processEmphasis(std::string_view data,size_t offset)
1061{
1062 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
1063 const size_t size = data.size();
1064
1065 if ((offset>0 && !isOpenEmphChar(data.data()[-1])) || // invalid char before * or _
1066 (size>1 && data[0]!=data[1] && !(isIdChar(data[1]) || extraChar(data[1]))) || // invalid char after * or _
1067 (size>2 && data[0]==data[1] && !(isIdChar(data[2]) || extraChar(data[2])))) // invalid char after ** or __
1068 {
1069 AUTO_TRACE_EXIT("invalid surrounding characters");
1070 return 0;
1071 }
1072
1073 char c = data[0];
1074 int ret = 0;
1075 if (size>2 && c!='~' && data[1]!=c) // _bla or *bla
1076 {
1077 // whitespace cannot follow an opening emphasis
1078 if (data[1]==' ' || data[1]=='\n' ||
1079 (ret = processEmphasis1(data.substr(1), c)) == 0)
1080 {
1081 return 0;
1082 }
1083 AUTO_TRACE_EXIT("result={}",ret+1);
1084 return ret+1;
1085 }
1086 if (size>3 && data[1]==c && data[2]!=c) // __bla or **bla
1087 {
1088 if (data[2]==' ' || data[2]=='\n' ||
1089 (ret = processEmphasis2(data.substr(2), c)) == 0)
1090 {
1091 return 0;
1092 }
1093 AUTO_TRACE_EXIT("result={}",ret+2);
1094 return ret+2;
1095 }
1096 if (size>4 && c!='~' && data[1]==c && data[2]==c && data[3]!=c) // ___bla or ***bla
1097 {
1098 if (data[3]==' ' || data[3]=='\n' ||
1099 (ret = processEmphasis3(data.substr(3), c)) == 0)
1100 {
1101 return 0;
1102 }
1103 AUTO_TRACE_EXIT("result={}",ret+3);
1104 return ret+3;
1105 }
1106 return 0;
1107}
1108
1110 std::string_view fmt, bool inline_img, bool explicitTitle,
1111 const QCString &title, const QCString &content,
1112 const QCString &link, const QCString &attrs,
1113 const FileDef *fd)
1114{
1115 AUTO_TRACE("fmt={} inline_img={} explicitTitle={} title={} content={} link={} attrs={}",
1116 fmt,inline_img,explicitTitle,Trace::trunc(title),Trace::trunc(content),link,attrs);
1117 QCString attributes = getFilteredImageAttributes(fmt, attrs);
1118 out+="@image";
1119 if (inline_img)
1120 {
1121 out+="{inline}";
1122 }
1123 out+=" ";
1124 out+=fmt;
1125 out+=" ";
1126 out+=link.mid(fd ? 0 : 5);
1127 if (!explicitTitle && !content.isEmpty())
1128 {
1129 out+=" \"";
1130 out+=escapeDoubleQuotes(content);
1131 out+="\"";
1132 }
1133 else if ((content.isEmpty() || explicitTitle) && !title.isEmpty())
1134 {
1135 out+=" \"";
1136 out+=escapeDoubleQuotes(title);
1137 out+="\"";
1138 }
1139 else
1140 {
1141 out+=" ";// so the line break will not be part of the image name
1142 }
1143 if (!attributes.isEmpty())
1144 {
1145 out+=" ";
1146 out+=attributes;
1147 out+=" ";
1148 }
1149 out+="\\ilinebr ";
1150}
1151
1152int Markdown::Private::processLink(const std::string_view data,size_t offset)
1153{
1154 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
1155 const size_t size = data.size();
1156 QCString content;
1157 QCString link;
1158 QCString title;
1159 bool isImageLink = FALSE;
1160 bool isImageInline = FALSE;
1161 bool isToc = FALSE;
1162 size_t i=1;
1163 if (data[0]=='!')
1164 {
1165 isImageLink = TRUE;
1166 if (size<2 || data[1]!='[')
1167 {
1168 return 0;
1169 }
1170
1171 // if there is non-whitespace before the ![ within the scope of two new lines, the image
1172 // is considered inlined, i.e. the image is not preceded by an empty line
1173 int numNLsNeeded=2;
1174 int pos = -1;
1175 while (pos>=-static_cast<int>(offset) && numNLsNeeded>0)
1176 {
1177 if (data.data()[pos]=='\n') numNLsNeeded--;
1178 else if (data.data()[pos]!=' ') // found non-whitespace, stop searching
1179 {
1180 isImageInline=true;
1181 break;
1182 }
1183 pos--;
1184 }
1185 // skip '!['
1186 i++;
1187 }
1188 size_t contentStart=i;
1189 int level=1;
1190 int nlTotal=0;
1191 int nl=0;
1192 // find the matching ]
1193 while (i<size)
1194 {
1195 if (data[i-1]=='\\') // skip escaped characters
1196 {
1197 }
1198 else if (data[i]=='[')
1199 {
1200 level++;
1201 }
1202 else if (data[i]==']')
1203 {
1204 level--;
1205 if (level<=0) break;
1206 }
1207 else if (data[i]=='\n')
1208 {
1209 nl++;
1210 if (nl>1) { return 0; } // only allow one newline in the content
1211 }
1212 i++;
1213 }
1214 nlTotal += nl;
1215 nl = 0;
1216 if (i>=size) return 0; // premature end of comment -> no link
1217 size_t contentEnd=i;
1218 content = data.substr(contentStart,contentEnd-contentStart);
1219 //printf("processLink: content={%s}\n",qPrint(content));
1220 if (!isImageLink && content.isEmpty()) { return 0; } // no link text
1221 i++; // skip over ]
1222
1223 bool whiteSpace = false;
1224 // skip whitespace
1225 while (i<size && data[i]==' ') { whiteSpace = true; i++; }
1226 if (i<size && data[i]=='\n') // one newline allowed here
1227 {
1228 whiteSpace = true;
1229 i++;
1230 // skip more whitespace
1231 while (i<size && data[i]==' ') i++;
1232 }
1233 if (whiteSpace && i<size && (data[i]=='(' || data[i]=='[')) return 0;
1234
1235 bool explicitTitle=FALSE;
1236 if (i<size && data[i]=='(') // inline link
1237 {
1238 i++;
1239 while (i<size && data[i]==' ') i++;
1240 bool uriFormat=false;
1241 if (i<size && data[i]=='<') { i++; uriFormat=true; }
1242 size_t linkStart=i;
1243 int braceCount=1;
1244 while (i<size && data[i]!='\'' && data[i]!='"' && braceCount>0)
1245 {
1246 if (data[i]=='\n') // unexpected EOL
1247 {
1248 nl++;
1249 if (nl>1) { return 0; }
1250 }
1251 else if (data[i]=='(')
1252 {
1253 braceCount++;
1254 }
1255 else if (data[i]==')')
1256 {
1257 braceCount--;
1258 }
1259 if (braceCount>0)
1260 {
1261 i++;
1262 }
1263 }
1264 nlTotal += nl;
1265 nl = 0;
1266 if (i>=size || data[i]=='\n') { return 0; }
1267 link = data.substr(linkStart,i-linkStart);
1268 link = link.stripWhiteSpace();
1269 //printf("processLink: link={%s}\n",qPrint(link));
1270 if (link.isEmpty()) { return 0; }
1271 if (uriFormat && link.at(link.length()-1)=='>') link=link.left(link.length()-1);
1272
1273 // optional title
1274 if (data[i]=='\'' || data[i]=='"')
1275 {
1276 char c = data[i];
1277 i++;
1278 size_t titleStart=i;
1279 nl=0;
1280 while (i<size)
1281 {
1282 if (data[i]=='\n')
1283 {
1284 if (nl>1) { return 0; }
1285 nl++;
1286 }
1287 else if (data[i]=='\\') // escaped char in string
1288 {
1289 i++;
1290 }
1291 else if (data[i]==c)
1292 {
1293 i++;
1294 break;
1295 }
1296 i++;
1297 }
1298 if (i>=size)
1299 {
1300 return 0;
1301 }
1302 size_t titleEnd = i-1;
1303 // search back for closing marker
1304 while (titleEnd>titleStart && data[titleEnd]==' ') titleEnd--;
1305 if (data[titleEnd]==c) // found it
1306 {
1307 title = data.substr(titleStart,titleEnd-titleStart);
1308 explicitTitle=TRUE;
1309 while (i<size)
1310 {
1311 if (data[i]==' ')i++; // remove space after the closing quote and the closing bracket
1312 else if (data[i] == ')') break; // the end bracket
1313 else // illegal
1314 {
1315 return 0;
1316 }
1317 }
1318 }
1319 else
1320 {
1321 return 0;
1322 }
1323 }
1324 i++;
1325 }
1326 else if (i<size && data[i]=='[') // reference link
1327 {
1328 i++;
1329 size_t linkStart=i;
1330 nl=0;
1331 // find matching ]
1332 while (i<size && data[i]!=']')
1333 {
1334 if (data[i]=='\n')
1335 {
1336 nl++;
1337 if (nl>1) { return 0; }
1338 }
1339 i++;
1340 }
1341 if (i>=size) { return 0; }
1342 // extract link
1343 link = data.substr(linkStart,i-linkStart);
1344 //printf("processLink: link={%s}\n",qPrint(link));
1345 link = link.stripWhiteSpace();
1346 if (link.isEmpty()) // shortcut link
1347 {
1348 link=content;
1349 }
1350 // lookup reference
1351 QCString link_lower = link.lower();
1352 auto lr_it=linkRefs.find(link_lower.str());
1353 if (lr_it!=linkRefs.end()) // found it
1354 {
1355 link = lr_it->second.link;
1356 title = lr_it->second.title;
1357 //printf("processLink: ref: link={%s} title={%s}\n",qPrint(link),qPrint(title));
1358 }
1359 else // reference not found!
1360 {
1361 //printf("processLink: ref {%s} do not exist\n",link.qPrint(lower()));
1362 return 0;
1363 }
1364 i++;
1365 }
1366 else if (i<size && data[i]!=':' && !content.isEmpty()) // minimal link ref notation [some id]
1367 {
1368 QCString content_lower = content.lower();
1369 auto lr_it = linkRefs.find(content_lower.str());
1370 //printf("processLink: minimal link {%s} lr=%p",qPrint(content),lr);
1371 if (lr_it!=linkRefs.end()) // found it
1372 {
1373 link = lr_it->second.link;
1374 title = lr_it->second.title;
1375 explicitTitle=TRUE;
1376 i=contentEnd;
1377 }
1378 else if (content=="TOC")
1379 {
1380 isToc=TRUE;
1381 i=contentEnd;
1382 }
1383 else
1384 {
1385 return 0;
1386 }
1387 i++;
1388 }
1389 else
1390 {
1391 return 0;
1392 }
1393 nlTotal += nl;
1394
1395 // search for optional image attributes
1396 QCString attributes;
1397 if (isImageLink)
1398 {
1399 size_t j = i;
1400 // skip over whitespace
1401 while (j<size && data[j]==' ') { j++; }
1402 if (j<size && data[j]=='{') // we have attributes
1403 {
1404 i = j;
1405 // skip over '{'
1406 i++;
1407 size_t attributesStart=i;
1408 nl=0;
1409 // find the matching '}'
1410 while (i<size)
1411 {
1412 if (data[i-1]=='\\') // skip escaped characters
1413 {
1414 }
1415 else if (data[i]=='{')
1416 {
1417 level++;
1418 }
1419 else if (data[i]=='}')
1420 {
1421 level--;
1422 if (level<=0) break;
1423 }
1424 else if (data[i]=='\n')
1425 {
1426 nl++;
1427 if (nl>1) { return 0; } // only allow one newline in the content
1428 }
1429 i++;
1430 }
1431 nlTotal += nl;
1432 if (i>=size) return 0; // premature end of comment -> no attributes
1433 size_t attributesEnd=i;
1434 attributes = data.substr(attributesStart,attributesEnd-attributesStart);
1435 i++; // skip over '}'
1436 }
1437 if (!isImageInline)
1438 {
1439 // if there is non-whitespace after the image within the scope of two new lines, the image
1440 // is considered inlined, i.e. the image is not followed by an empty line
1441 int numNLsNeeded=2;
1442 size_t pos = i;
1443 while (pos<size && numNLsNeeded>0)
1444 {
1445 if (data[pos]=='\n') numNLsNeeded--;
1446 else if (data[pos]!=' ') // found non-whitespace, stop searching
1447 {
1448 isImageInline=true;
1449 break;
1450 }
1451 pos++;
1452 }
1453 }
1454 }
1455
1456 if (isToc) // special case for [TOC]
1457 {
1458 int toc_level = Config_getInt(TOC_INCLUDE_HEADINGS);
1459 if (toc_level>=SectionType::MinLevel && toc_level<=SectionType::MaxLevel)
1460 {
1461 out+="@tableofcontents{html:";
1462 out+=QCString().setNum(toc_level);
1463 out+="}";
1464 }
1465 }
1466 else if (isImageLink)
1467 {
1468 bool ambig = false;
1469 FileDef *fd=nullptr;
1470 if (link.find("@ref ")!=-1 || link.find("\\ref ")!=-1 ||
1472 // assume doxygen symbol link or local image link
1473 {
1474 // check if different handling is needed per format
1475 writeMarkdownImage("html", isImageInline, explicitTitle, title, content, link, attributes, fd);
1476 writeMarkdownImage("latex", isImageInline, explicitTitle, title, content, link, attributes, fd);
1477 writeMarkdownImage("rtf", isImageInline, explicitTitle, title, content, link, attributes, fd);
1478 writeMarkdownImage("docbook", isImageInline, explicitTitle, title, content, link, attributes, fd);
1479 writeMarkdownImage("xml", isImageInline, explicitTitle, title, content, link, attributes, fd);
1480 }
1481 else
1482 {
1483 out+="<img src=\"";
1484 out+=link;
1485 out+="\" alt=\"";
1486 out+=content;
1487 out+="\"";
1488 if (!title.isEmpty())
1489 {
1490 out+=" title=\"";
1491 out+=substitute(title.simplifyWhiteSpace(),"\"","&quot;");
1492 out+="\"";
1493 }
1494 out+="/>";
1495 }
1496 }
1497 else
1498 {
1500 int lp=-1;
1501 if ((lp=link.find("@ref "))!=-1 || (lp=link.find("\\ref "))!=-1 || (lang==SrcLangExt::Markdown && !isURL(link)))
1502 // assume doxygen symbol link
1503 {
1504 if (lp==-1) // link to markdown page
1505 {
1506 out+="@ref \"";
1507 if (!(Portable::isAbsolutePath(link) || isURL(link)))
1508 {
1509 FileInfo forg(link.str());
1510 if (forg.exists() && forg.isReadable())
1511 {
1512 link = forg.absFilePath();
1513 }
1514 else if (!(forg.exists() && forg.isReadable()))
1515 {
1516 FileInfo fi(fileName.str());
1517 QCString mdFile = fileName.left(fileName.length()-fi.fileName().length()) + link;
1518 FileInfo fmd(mdFile.str());
1519 if (fmd.exists() && fmd.isReadable())
1520 {
1521 link = fmd.absFilePath().data();
1522 }
1523 }
1524 }
1525 out+=link;
1526 out+="\"";
1527 }
1528 else
1529 {
1530 out+=link;
1531 }
1532 out+=" \"";
1533 if (explicitTitle && !title.isEmpty())
1534 {
1535 out+=substitute(title,"\"","&quot;");
1536 }
1537 else
1538 {
1539 processInline(std::string_view(substitute(content,"\"","&quot;").str()));
1540 }
1541 out+="\"";
1542 }
1543 else if ((lp=link.find('#'))!=-1 || link.find('/')!=-1 || link.find('.')!=-1)
1544 { // file/url link
1545 if (lp==0 || (lp>0 && !isURL(link) && Config_getEnum(MARKDOWN_ID_STYLE)==MARKDOWN_ID_STYLE_t::GITHUB))
1546 {
1547 out+="@ref \"";
1548 out+=AnchorGenerator::addPrefixIfNeeded(link.mid(lp+1).str());
1549 out+="\" \"";
1550 out+=substitute(content.simplifyWhiteSpace(),"\"","&quot;");
1551 out+="\"";
1552 }
1553 else
1554 {
1555 out+="<a href=\"";
1556 out+=link;
1557 out+="\"";
1558 for (int ii = 0; ii < nlTotal; ii++) out+="\n";
1559 if (!title.isEmpty())
1560 {
1561 out+=" title=\"";
1562 out+=substitute(title.simplifyWhiteSpace(),"\"","&quot;");
1563 out+="\"";
1564 }
1565 out+=" ";
1566 out+=externalLinkTarget();
1567 out+=">";
1568 content = content.simplifyWhiteSpace();
1569 processInline(std::string_view(content.str()));
1570 out+="</a>";
1571 }
1572 }
1573 else // avoid link to e.g. F[x](y)
1574 {
1575 //printf("no link for '%s'\n",qPrint(link));
1576 return 0;
1577 }
1578 }
1579 AUTO_TRACE_EXIT("result={}",i);
1580 return static_cast<int>(i);
1581}
1582
1583/** `` ` `` parsing a code span (assuming codespan != 0) */
1584int Markdown::Private::processCodeSpan(std::string_view data,size_t)
1585{
1586 AUTO_TRACE("data='{}'",Trace::trunc(data));
1587 const size_t size = data.size();
1588
1589 /* counting the number of backticks in the delimiter */
1590 size_t nb=0, end=0;
1591 while (nb<size && data[nb]=='`')
1592 {
1593 nb++;
1594 }
1595
1596 /* finding the next delimiter */
1597 size_t i = 0;
1598 char pc = '`';
1599 for (end=nb; end<size && i<nb; end++)
1600 {
1601 if (data[end]=='`')
1602 {
1603 i++;
1604 }
1605 else if (data[end]=='\n')
1606 {
1607 // consecutive newlines
1608 if (pc == '\n') return 0;
1609 pc = '\n';
1610 i = 0;
1611 }
1612 else if (data[end]=='\'' && nb==1 && (end==size-1 || (end<size-1 && !isIdChar(data[end+1]))))
1613 { // look for quoted strings like 'some word', but skip strings like `it's cool`
1614 out+="&lsquo;";
1615 out+=data.substr(nb,end-nb);
1616 out+="&rsquo;";
1617 return static_cast<int>(end+1);
1618 }
1619 else
1620 {
1621 if (data[end]!=' ') pc = data[end];
1622 i=0;
1623 }
1624 }
1625 if (i < nb && end >= size)
1626 {
1627 return 0; // no matching delimiter
1628 }
1629
1630 // trimming outside whitespaces
1631 size_t f_begin = nb;
1632 while (f_begin < end && data[f_begin]==' ')
1633 {
1634 f_begin++;
1635 }
1636 size_t f_end = end - nb;
1637 while (f_end > nb && data[f_end-1]==' ')
1638 {
1639 f_end--;
1640 }
1641
1642 //printf("found code span '%s'\n",qPrint(QCString(data+f_begin).left(f_end-f_begin)));
1643
1644 /* real code span */
1645 if (f_begin < f_end)
1646 {
1647 QCString codeFragment = data.substr(f_begin, f_end-f_begin);
1648 out+="<tt>";
1649 out+=escapeSpecialChars(codeFragment);
1650 out+="</tt>";
1651 }
1652 AUTO_TRACE_EXIT("result={}",end);
1653 return static_cast<int>(end);
1654}
1655
1657{
1658 AUTO_TRACE("{}",Trace::trunc(data));
1659 if (Portable::strnstr(data.data(),g_doxy_nbsp,data.size())==nullptr) // no escape needed -> fast
1660 {
1661 out+=data;
1662 }
1663 else // escape needed -> slow
1664 {
1665 out+=substitute(QCString(data),g_doxy_nbsp,g_utf8_nbsp);
1666 }
1667}
1668
1669int Markdown::Private::processSpecialCommand(std::string_view data, size_t offset)
1670{
1671 AUTO_TRACE("{}",Trace::trunc(data));
1672 const size_t size = data.size();
1673 size_t i=1;
1674 QCString endBlockName = isBlockCommand(data,offset);
1675 if (!endBlockName.isEmpty())
1676 {
1677 AUTO_TRACE_ADD("endBlockName={}",endBlockName);
1678 size_t l = endBlockName.length();
1679 while (i+l<size)
1680 {
1681 if ((data[i]=='\\' || data[i]=='@') && // command
1682 data[i-1]!='\\' && data[i-1]!='@') // not escaped
1683 {
1684 if (qstrncmp(&data[i+1],endBlockName.data(),l)==0)
1685 {
1686 //printf("found end at %d\n",i);
1687 addStrEscapeUtf8Nbsp(data.substr(0,i+1+l));
1688 AUTO_TRACE_EXIT("result={}",i+1+l);
1689 return static_cast<int>(i+1+l);
1690 }
1691 }
1692 i++;
1693 }
1694 }
1695 size_t endPos = isSpecialCommand(data,offset);
1696 if (endPos>0)
1697 {
1698 out+=data.substr(0,endPos);
1699 return static_cast<int>(endPos);
1700 }
1701 if (size>1 && data[0]=='\\') // escaped characters
1702 {
1703 char c=data[1];
1704 if (c=='[' || c==']' || c=='*' || c=='!' || c=='(' || c==')' || c=='`' || c=='_')
1705 {
1706 out+=data[1];
1707 AUTO_TRACE_EXIT("2");
1708 return 2;
1709 }
1710 else if (c=='\\' || c=='@')
1711 {
1712 out+=data.substr(0,2);
1713 AUTO_TRACE_EXIT("2");
1714 return 2;
1715 }
1716 else if (c=='-' && size>3 && data[2]=='-' && data[3]=='-') // \---
1717 {
1718 out+=data.substr(1,3);
1719 AUTO_TRACE_EXIT("2");
1720 return 4;
1721 }
1722 else if (c=='-' && size>2 && data[2]=='-') // \--
1723 {
1724 out+=data.substr(1,2);
1725 AUTO_TRACE_EXIT("3");
1726 return 3;
1727 }
1728 }
1729 else if (size>1 && data[0]=='@') // escaped characters
1730 {
1731 char c=data[1];
1732 if (c=='\\' || c=='@')
1733 {
1734 out+=data.substr(0,2);
1735 AUTO_TRACE_EXIT("2");
1736 return 2;
1737 }
1738 }
1739 return 0;
1740}
1741
1742void Markdown::Private::processInline(std::string_view data)
1743{
1744 AUTO_TRACE("data='{}'",Trace::trunc(data));
1745 size_t i=0;
1746 size_t end=0;
1747 Action_t action;
1748 const size_t size = data.size();
1749 while (i<size)
1750 {
1751 // skip over characters that do not trigger a specific action
1752 while (end<size && ((action=actions[static_cast<uint8_t>(data[end])])==nullptr)) end++;
1753 // and add them to the output
1754 out+=data.substr(i,end-i);
1755 if (end>=size) break;
1756 i=end;
1757 // do the action matching a special character at i
1758 int iend = action(data.substr(i),i);
1759 if (iend<=0) // update end
1760 {
1761 end=i+1-iend;
1762 }
1763 else // skip until end
1764 {
1765 i+=iend;
1766 end=i;
1767 }
1768 }
1769}
1770
1771/** returns whether the line is a setext-style hdr underline */
1772int Markdown::Private::isHeaderline(std::string_view data, bool allowAdjustLevel)
1773{
1774 AUTO_TRACE("data='{}' allowAdjustLevel",Trace::trunc(data),allowAdjustLevel);
1775 size_t i=0, c=0;
1776 const size_t size = data.size();
1777 while (i<size && data[i]==' ') i++;
1778 if (i==size) return 0;
1779
1780 // test of level 1 header
1781 if (data[i]=='=')
1782 {
1783 while (i<size && data[i]=='=') i++,c++;
1784 while (i<size && data[i]==' ') i++;
1785 int level = (c>1 && (i>=size || data[i]=='\n')) ? 1 : 0;
1786 if (allowAdjustLevel && level==1 && indentLevel==-1)
1787 {
1788 // In case a page starts with a header line we use it as title, promoting it to @page.
1789 // We set g_indentLevel to -1 to promoting the other sections if they have a deeper
1790 // nesting level than the page header, i.e. @section..@subsection becomes @page..@section.
1791 // In case a section at the same level is found (@section..@section) however we need
1792 // to undo this (and the result will be @page..@section).
1793 indentLevel=0;
1794 }
1795 AUTO_TRACE_EXIT("result={}",indentLevel+level);
1796 return indentLevel+level;
1797 }
1798 // test of level 2 header
1799 if (data[i]=='-')
1800 {
1801 while (i<size && data[i]=='-') i++,c++;
1802 while (i<size && data[i]==' ') i++;
1803 return (c>1 && (i>=size || data[i]=='\n')) ? indentLevel+2 : 0;
1804 }
1805 return 0;
1806}
1807
1808/** returns true if this line starts a block quote */
1809static bool isBlockQuote(std::string_view data,size_t indent)
1810{
1811 AUTO_TRACE("data='{}' indent={}",Trace::trunc(data),indent);
1812 size_t i = 0;
1813 const size_t size = data.size();
1814 while (i<size && data[i]==' ') i++;
1815 if (i<indent+codeBlockIndent) // could be a quotation
1816 {
1817 // count >'s and skip spaces
1818 int level=0;
1819 while (i<size && (data[i]=='>' || data[i]==' '))
1820 {
1821 if (data[i]=='>') level++;
1822 i++;
1823 }
1824 // last characters should be a space or newline,
1825 // so a line starting with >= does not match, but only when level equals 1
1826 bool res = (level>0 && i<size && ((data[i-1]==' ') || data[i]=='\n')) || (level > 1);
1827 AUTO_TRACE_EXIT("result={}",res);
1828 return res;
1829 }
1830 else // too much indentation -> code block
1831 {
1832 AUTO_TRACE_EXIT("result=false: too much indentation");
1833 return false;
1834 }
1835}
1836
1837/** returns end of the link ref if this is indeed a link reference. */
1838static size_t isLinkRef(std::string_view data, QCString &refid, QCString &link, QCString &title)
1839{
1840 AUTO_TRACE("data='{}'",Trace::trunc(data));
1841 const size_t size = data.size();
1842 // format: start with [some text]:
1843 size_t i = 0;
1844 while (i<size && data[i]==' ') i++;
1845 if (i>=size || data[i]!='[') { return 0; }
1846 i++;
1847 size_t refIdStart=i;
1848 while (i<size && data[i]!='\n' && data[i]!=']') i++;
1849 if (i>=size || data[i]!=']') { return 0; }
1850 refid = data.substr(refIdStart,i-refIdStart);
1851 if (refid.isEmpty()) { return 0; }
1852 AUTO_TRACE_ADD("refid found {}",refid);
1853 //printf(" isLinkRef: found refid='%s'\n",qPrint(refid));
1854 i++;
1855 if (i>=size || data[i]!=':') { return 0; }
1856 i++;
1857
1858 // format: whitespace* \n? whitespace* (<url> | url)
1859 while (i<size && data[i]==' ') i++;
1860 if (i<size && data[i]=='\n')
1861 {
1862 i++;
1863 while (i<size && data[i]==' ') i++;
1864 }
1865 if (i>=size) { return 0; }
1866
1867 if (i<size && data[i]=='<') i++;
1868 size_t linkStart=i;
1869 while (i<size && data[i]!=' ' && data[i]!='\n') i++;
1870 size_t linkEnd=i;
1871 if (i<size && data[i]=='>') i++;
1872 if (linkStart==linkEnd) { return 0; } // empty link
1873 link = data.substr(linkStart,linkEnd-linkStart);
1874 AUTO_TRACE_ADD("link found {}",Trace::trunc(link));
1875 if (link=="@ref" || link=="\\ref")
1876 {
1877 size_t argStart=i;
1878 while (i<size && data[i]!='\n' && data[i]!='"') i++;
1879 link+=data.substr(argStart,i-argStart);
1880 }
1881
1882 title.clear();
1883
1884 // format: (whitespace* \n? whitespace* ( 'title' | "title" | (title) ))?
1885 size_t eol=0;
1886 while (i<size && data[i]==' ') i++;
1887 if (i<size && data[i]=='\n')
1888 {
1889 eol=i;
1890 i++;
1891 while (i<size && data[i]==' ') i++;
1892 }
1893 if (i>=size)
1894 {
1895 AUTO_TRACE_EXIT("result={}: end of isLinkRef while looking for title",i);
1896 return i; // end of buffer while looking for the optional title
1897 }
1898
1899 char c = data[i];
1900 if (c=='\'' || c=='"' || c=='(') // optional title present?
1901 {
1902 //printf(" start of title found! char='%c'\n",c);
1903 i++;
1904 if (c=='(') c=')'; // replace c by end character
1905 size_t titleStart=i;
1906 // search for end of the line
1907 while (i<size && data[i]!='\n') i++;
1908 eol = i;
1909
1910 // search back to matching character
1911 size_t end=i-1;
1912 while (end>titleStart && data[end]!=c) end--;
1913 if (end>titleStart)
1914 {
1915 title = data.substr(titleStart,end-titleStart);
1916 }
1917 AUTO_TRACE_ADD("title found {}",Trace::trunc(title));
1918 }
1919 while (i<size && data[i]==' ') i++;
1920 //printf("end of isLinkRef: i=%d size=%d data[i]='%c' eol=%d\n",
1921 // i,size,data[i],eol);
1922 if (i>=size) { AUTO_TRACE_EXIT("result={}",i); return i; } // end of buffer while ref id was found
1923 else if (eol>0) { AUTO_TRACE_EXIT("result={}",eol); return eol; } // end of line while ref id was found
1924 return 0; // invalid link ref
1925}
1926
1927static bool isHRuler(std::string_view data)
1928{
1929 AUTO_TRACE("data='{}'",Trace::trunc(data));
1930 size_t i=0;
1931 size_t size = data.size();
1932 if (size>0 && data[size-1]=='\n') size--; // ignore newline character
1933 while (i<size && data[i]==' ') i++;
1934 if (i>=size) { AUTO_TRACE_EXIT("result=false: empty line"); return false; } // empty line
1935 char c=data[i];
1936 if (c!='*' && c!='-' && c!='_')
1937 {
1938 AUTO_TRACE_EXIT("result=false: {} is not a hrule character",c);
1939 return false; // not a hrule character
1940 }
1941 int n=0;
1942 while (i<size)
1943 {
1944 if (data[i]==c)
1945 {
1946 n++; // count rule character
1947 }
1948 else if (data[i]!=' ')
1949 {
1950 AUTO_TRACE_EXIT("result=false: line contains non hruler characters");
1951 return false; // line contains non hruler characters
1952 }
1953 i++;
1954 }
1955 AUTO_TRACE_EXIT("result={}",n>=3);
1956 return n>=3; // at least 3 characters needed for a hruler
1957}
1958
1959QCString Markdown::Private::extractTitleId(QCString &title, int level, bool *pIsIdGenerated)
1960{
1961 AUTO_TRACE("title={} level={}",Trace::trunc(title),level);
1962 // match e.g. '{#id-b11} ' and capture 'id-b11'
1963 static const reg::Ex r2(R"({#(\a[\w-]*)}\s*$)");
1964 reg::Match match;
1965 std::string ti = title.str();
1966 if (reg::search(ti,match,r2))
1967 {
1968 std::string id = match[1].str();
1969 title = title.left(match.position());
1970 if (AnchorGenerator::instance().reserve(id)>0)
1971 {
1972 warn(fileName, lineNr, "An automatically generated id already has the name '%s'!", id.c_str());
1973 }
1974 //printf("found match id='%s' title=%s\n",id.c_str(),qPrint(title));
1975 AUTO_TRACE_EXIT("id={}",id);
1976 return id;
1977 }
1978 if ((level>0) && (level<=Config_getInt(TOC_INCLUDE_HEADINGS)))
1979 {
1980 QCString id = AnchorGenerator::instance().generate(ti);
1981 if (pIsIdGenerated) *pIsIdGenerated=true;
1982 //printf("auto-generated id='%s' title='%s'\n",qPrint(id),qPrint(title));
1983 AUTO_TRACE_EXIT("id={}",id);
1984 return id;
1985 }
1986 //printf("no id found in title '%s'\n",qPrint(title));
1987 return "";
1988}
1989
1990
1991int Markdown::Private::isAtxHeader(std::string_view data,
1992 QCString &header,QCString &id,bool allowAdjustLevel,bool *pIsIdGenerated)
1993{
1994 AUTO_TRACE("data='{}' header={} id={} allowAdjustLevel={}",Trace::trunc(data),Trace::trunc(header),id,allowAdjustLevel);
1995 size_t i = 0;
1996 int level = 0, blanks=0;
1997 const size_t size = data.size();
1998
1999 // find start of header text and determine heading level
2000 while (i<size && data[i]==' ') i++;
2001 if (i>=size || data[i]!='#')
2002 {
2003 return 0;
2004 }
2005 while (i<size && data[i]=='#') i++,level++;
2006 if (level>SectionType::MaxLevel) // too many #'s -> no section
2007 {
2008 return 0;
2009 }
2010 while (i<size && data[i]==' ') i++,blanks++;
2011 if (level==1 && blanks==0)
2012 {
2013 return 0; // special case to prevent #someid seen as a header (see bug 671395)
2014 }
2015
2016 // find end of header text
2017 size_t end=i;
2018 while (end<size && data[end]!='\n') end++;
2019 while (end>i && (data[end-1]=='#' || data[end-1]==' ')) end--;
2020
2021 // store result
2022 header = data.substr(i,end-i);
2023 id = extractTitleId(header, level, pIsIdGenerated);
2024 if (!id.isEmpty()) // strip #'s between title and id
2025 {
2026 int idx=static_cast<int>(header.length())-1;
2027 while (idx>=0 && (header.at(idx)=='#' || header.at(idx)==' ')) idx--;
2028 header=header.left(idx+1);
2029 }
2030
2031 if (allowAdjustLevel && level==1 && indentLevel==-1)
2032 {
2033 // in case we find a `# Section` on a markdown page that started with the same level
2034 // header, we no longer need to artificially decrease the paragraph level.
2035 // So both
2036 // -------------------
2037 // # heading 1 <-- here we set g_indentLevel to -1
2038 // # heading 2 <-- here we set g_indentLevel back to 0 such that this will be a @section
2039 // -------------------
2040 // and
2041 // -------------------
2042 // # heading 1 <-- here we set g_indentLevel to -1
2043 // ## heading 2 <-- here we keep g_indentLevel at -1 such that @subsection will be @section
2044 // -------------------
2045 // will convert to
2046 // -------------------
2047 // @page md_page Heading 1
2048 // @section autotoc_md1 Heading 2
2049 // -------------------
2050
2051 indentLevel=0;
2052 }
2053 int res = level+indentLevel;
2054 AUTO_TRACE_EXIT("result={}",res);
2055 return res;
2056}
2057
2058static bool isEmptyLine(std::string_view data)
2059{
2060 AUTO_TRACE("data='{}'",Trace::trunc(data));
2061 size_t i=0;
2062 while (i<data.size())
2063 {
2064 if (data[i]=='\n') { AUTO_TRACE_EXIT("true"); return true; }
2065 if (data[i]!=' ') { AUTO_TRACE_EXIT("false"); return false; }
2066 i++;
2067 }
2068 AUTO_TRACE_EXIT("true");
2069 return true;
2070}
2071
2072#define isLiTag(i) \
2073 (data[(i)]=='<' && \
2074 (data[(i)+1]=='l' || data[(i)+1]=='L') && \
2075 (data[(i)+2]=='i' || data[(i)+2]=='I') && \
2076 (data[(i)+3]=='>'))
2077
2078// compute the indent from the start of the input, excluding list markers
2079// such as -, -#, *, +, 1., and <li>
2080static size_t computeIndentExcludingListMarkers(std::string_view data)
2081{
2082 AUTO_TRACE("data='{}'",Trace::trunc(data));
2083 size_t i=0;
2084 const size_t size=data.size();
2085 size_t indent=0;
2086 bool isDigit=FALSE;
2087 bool isLi=FALSE;
2088 bool listMarkerSkipped=FALSE;
2089 while (i<size &&
2090 (data[i]==' ' || // space
2091 (!listMarkerSkipped && // first list marker
2092 (data[i]=='+' || data[i]=='-' || data[i]=='*' || // unordered list char
2093 (data[i]=='#' && i>0 && data[i-1]=='-') || // -# item
2094 (isDigit=(data[i]>='1' && data[i]<='9')) || // ordered list marker?
2095 (isLi=(size>=3 && i<size-3 && isLiTag(i))) // <li> tag
2096 )
2097 )
2098 )
2099 )
2100 {
2101 if (isDigit) // skip over ordered list marker '10. '
2102 {
2103 size_t j=i+1;
2104 while (j<size && ((data[j]>='0' && data[j]<='9') || data[j]=='.'))
2105 {
2106 if (data[j]=='.') // should be end of the list marker
2107 {
2108 if (j<size-1 && data[j+1]==' ') // valid list marker
2109 {
2110 listMarkerSkipped=TRUE;
2111 indent+=j+1-i;
2112 i=j+1;
2113 break;
2114 }
2115 else // not a list marker
2116 {
2117 break;
2118 }
2119 }
2120 j++;
2121 }
2122 }
2123 else if (isLi)
2124 {
2125 i+=3; // skip over <li>
2126 indent+=3;
2127 listMarkerSkipped=TRUE;
2128 }
2129 else if (data[i]=='-' && size>=2 && i<size-2 && data[i+1]=='#' && data[i+2]==' ')
2130 { // case "-# "
2131 listMarkerSkipped=TRUE; // only a single list marker is accepted
2132 i++; // skip over #
2133 indent++;
2134 }
2135 else if (data[i]!=' ' && i<size-1 && data[i+1]==' ')
2136 { // case "- " or "+ " or "* "
2137 listMarkerSkipped=TRUE; // only a single list marker is accepted
2138 }
2139 if (data[i]!=' ' && !listMarkerSkipped)
2140 { // end of indent
2141 break;
2142 }
2143 indent++,i++;
2144 }
2145 AUTO_TRACE_EXIT("result={}",indent);
2146 return indent;
2147}
2148
2149static size_t isListMarker(std::string_view data)
2150{
2151 AUTO_TRACE("data='{}'",Trace::trunc(data));
2152 size_t normalIndent = 0;
2153 while (normalIndent<data.size() && data[normalIndent]==' ') normalIndent++;
2154 size_t listIndent = computeIndentExcludingListMarkers(data);
2155 size_t result = listIndent>normalIndent ? listIndent : 0;
2156 AUTO_TRACE_EXIT("result={}",result);
2157 return result;
2158}
2159
2160static bool isEndOfList(std::string_view data)
2161{
2162 AUTO_TRACE("data='{}'",Trace::trunc(data));
2163 int dots=0;
2164 size_t i=0;
2165 // end of list marker is an otherwise empty line with a dot.
2166 while (i<data.size())
2167 {
2168 if (data[i]=='.')
2169 {
2170 dots++;
2171 }
2172 else if (data[i]=='\n')
2173 {
2174 break;
2175 }
2176 else if (data[i]!=' ' && data[i]!='\t') // bail out if the line is not empty
2177 {
2178 AUTO_TRACE_EXIT("result=false");
2179 return false;
2180 }
2181 i++;
2182 }
2183 AUTO_TRACE_EXIT("result={}",dots==1);
2184 return dots==1;
2185}
2186
2187static bool isFencedCodeBlock(std::string_view data,size_t refIndent,
2188 QCString &lang,size_t &start,size_t &end,size_t &offset)
2189{
2190 AUTO_TRACE("data='{}' refIndent={}",Trace::trunc(data),refIndent);
2191 const char dot = '.';
2192 auto isAlphaChar = [ ](char c) { return (c>='A' && c<='Z') || (c>='a' && c<='z'); };
2193 auto isAlphaNChar = [ ](char c) { return (c>='A' && c<='Z') || (c>='a' && c<='z') || (c>='0' && c<='9'); };
2194 auto isLangChar = [&](char c) { return c==dot || isAlphaChar(c); };
2195 // rules: at least 3 ~~~, end of the block same amount of ~~~'s, otherwise
2196 // return FALSE
2197 size_t i=0;
2198 size_t indent=0;
2199 int startTildes=0;
2200 const size_t size = data.size();
2201 while (i<size && data[i]==' ') indent++,i++;
2202 if (indent>=refIndent+4)
2203 {
2204 AUTO_TRACE_EXIT("result=false: content is part of code block indent={} refIndent={}",indent,refIndent);
2205 return FALSE;
2206 } // part of code block
2207 char tildaChar='~';
2208 if (i<size && data[i]=='`') tildaChar='`';
2209 while (i<size && data[i]==tildaChar) startTildes++,i++;
2210 if (startTildes<3)
2211 {
2212 AUTO_TRACE_EXIT("result=false: no fence marker found #tildes={}",startTildes);
2213 return FALSE;
2214 } // not enough tildes
2215 if (i<size && data[i]=='{') // extract .py from ```{.py} ... ```
2216 {
2217 i++; // skip over {
2218 if (data[i] == dot) i++; // skip over initial dot
2219 size_t startLang=i;
2220 while (i<size && (data[i]!='\n' && data[i]!='}')) i++; // find matching }
2221 if (i<size && data[i]=='}')
2222 {
2223 lang = data.substr(startLang,i-startLang);
2224 i++;
2225 }
2226 else // missing closing bracket, treat `{` as part of the content
2227 {
2228 i=startLang-1;
2229 lang="";
2230 }
2231 }
2232 else if (i<size && isLangChar(data[i])) /// extract python or .py from ```python...``` or ```.py...```
2233 {
2234 if (data[i] == dot) i++; // skip over initial dot
2235 size_t startLang=i;
2236 if (i<size && isAlphaChar(data[i])) //check first character of language specifier
2237 {
2238 i++;
2239 while (i<size && isAlphaNChar(data[i])) i++; // find end of language specifier
2240 }
2241 lang = data.substr(startLang,i-startLang);
2242 }
2243 else // no language specified
2244 {
2245 lang="";
2246 }
2247
2248 start=i;
2249 while (i<size)
2250 {
2251 if (data[i]==tildaChar)
2252 {
2253 end=i;
2254 int endTildes=0;
2255 while (i<size && data[i]==tildaChar) endTildes++,i++;
2256 while (i<size && data[i]==' ') i++;
2257 {
2258 if (endTildes==startTildes)
2259 {
2260 offset=i;
2261 AUTO_TRACE_EXIT("result=true: found end marker at offset {} lang='{}'",offset,lang);
2262 return true;
2263 }
2264 }
2265 }
2266 i++;
2267 }
2268 AUTO_TRACE_EXIT("result=false: no end marker found lang={}'",lang);
2269 return false;
2270}
2271
2272static bool isCodeBlock(std::string_view data, size_t offset,size_t &indent)
2273{
2274 AUTO_TRACE("data='{}' offset={}",Trace::trunc(data),offset);
2275 //printf("<isCodeBlock(offset=%d,size=%d,indent=%d)\n",offset,size,indent);
2276 // determine the indent of this line
2277 size_t i=0;
2278 size_t indent0=0;
2279 const size_t size = data.size();
2280 while (i<size && data[i]==' ') indent0++,i++;
2281
2282 if (indent0<codeBlockIndent)
2283 {
2284 AUTO_TRACE_EXIT("result={}: line is not indented enough {}<4",false,indent0);
2285 return false;
2286 }
2287 if (indent0>=size || data[indent0]=='\n') // empty line does not start a code block
2288 {
2289 AUTO_TRACE_EXIT("result={}: only spaces at the end of a comment block",false);
2290 return false;
2291 }
2292
2293 i=offset;
2294 int nl=0;
2295 int nl_pos[3];
2296 int offset_i = static_cast<int>(offset);
2297 // search back 3 lines and remember the start of lines -1 and -2
2298 while (i>0 && nl<3) // i counts down from offset to 1
2299 {
2300 int j = static_cast<int>(i)-offset_i-1; // j counts from -1 to -offset
2301 // since j can be negative we need to rewrap data in a std::string_view
2302 size_t nl_size = isNewline(std::string_view(data.data()+j,data.size()-j));
2303 if (nl_size>0)
2304 {
2305 nl_pos[nl++]=j+static_cast<int>(nl_size);
2306 }
2307 i--;
2308 }
2309
2310 // if there are only 2 preceding lines, then line -2 starts at -offset
2311 if (i==0 && nl==2) nl_pos[nl++]=-offset_i;
2312
2313 if (nl==3) // we have at least 2 preceding lines
2314 {
2315 //printf(" positions: nl_pos=[%d,%d,%d] line[-2]='%s' line[-1]='%s'\n",
2316 // nl_pos[0],nl_pos[1],nl_pos[2],
2317 // qPrint(QCString(data+nl_pos[1]).left(nl_pos[0]-nl_pos[1]-1)),
2318 // qPrint(QCString(data+nl_pos[2]).left(nl_pos[1]-nl_pos[2]-1)));
2319
2320 // check that line -1 is empty
2321 // Note that the offset is negative so we need to rewrap the string view
2322 if (!isEmptyLine(std::string_view(data.data()+nl_pos[1],nl_pos[0]-nl_pos[1]-1)))
2323 {
2324 AUTO_TRACE_EXIT("result={}",FALSE);
2325 return FALSE;
2326 }
2327
2328 // determine the indent of line -2
2329 // Note that the offset is negative so we need to rewrap the string view
2330 indent=std::max(indent,computeIndentExcludingListMarkers(
2331 std::string_view(data.data()+nl_pos[2],nl_pos[1]-nl_pos[2])));
2332
2333 //printf(">isCodeBlock local_indent %d>=%d+%d=%d\n",
2334 // indent0,indent,codeBlockIndent,indent0>=indent+codeBlockIndent);
2335 // if the difference is >4 spaces -> code block
2336 bool res = indent0>=indent+codeBlockIndent;
2337 AUTO_TRACE_EXIT("result={}: code block if indent difference >4 spaces",res);
2338 return res;
2339 }
2340 else // not enough lines to determine the relative indent, use global indent
2341 {
2342 // check that line -1 is empty
2343 // Note that the offset is negative so we need to rewrap the string view
2344 if (nl==1 && !isEmptyLine(std::string_view(data.data()-offset,offset-1)))
2345 {
2346 AUTO_TRACE_EXIT("result=false");
2347 return FALSE;
2348 }
2349 //printf(">isCodeBlock global indent %d>=%d+4=%d nl=%d\n",
2350 // indent0,indent,indent0>=indent+4,nl);
2351 bool res = indent0>=indent+codeBlockIndent;
2352 AUTO_TRACE_EXIT("result={}: code block if indent difference >4 spaces",res);
2353 return res;
2354 }
2355}
2356
2357/** Finds the location of the table's contains in the string \a data.
2358 * Only one line will be inspected.
2359 * @param[in] data pointer to the string buffer.
2360 * @param[out] start offset of the first character of the table content
2361 * @param[out] end offset of the last character of the table content
2362 * @param[out] columns number of table columns found
2363 * @returns The offset until the next line in the buffer.
2364 */
2365static size_t findTableColumns(std::string_view data,size_t &start,size_t &end,size_t &columns)
2366{
2367 AUTO_TRACE("data='{}'",Trace::trunc(data));
2368 const size_t size = data.size();
2369 size_t i=0,n=0;
2370 // find start character of the table line
2371 while (i<size && data[i]==' ') i++;
2372 if (i<size && data[i]=='|' && data[i]!='\n') i++,n++; // leading | does not count
2373 start = i;
2374
2375 // find end character of the table line
2376 size_t j = 0;
2377 while (i<size && (j = isNewline(data.substr(i)))==0) i++;
2378 size_t eol=i+j;
2379
2380 if (j>0 && i>0) i--; // move i to point before newline
2381 while (i>0 && data[i]==' ') i--;
2382 if (i>0 && data[i-1]!='\\' && data[i]=='|') i--,n++; // trailing or escaped | does not count
2383 end = i;
2384
2385 // count columns between start and end
2386 columns=0;
2387 if (end>start)
2388 {
2389 i=start;
2390 while (i<=end) // look for more column markers
2391 {
2392 if (data[i]=='|' && (i==0 || data[i-1]!='\\')) columns++;
2393 if (columns==1) columns++; // first | make a non-table into a two column table
2394 i++;
2395 }
2396 }
2397 if (n==2 && columns==0) // table row has | ... |
2398 {
2399 columns++;
2400 }
2401 AUTO_TRACE_EXIT("eol={} start={} end={} columns={}",eol,start,end,columns);
2402 return eol;
2403}
2404
2405/** Returns TRUE iff data points to the start of a table block */
2406static bool isTableBlock(std::string_view data)
2407{
2408 AUTO_TRACE("data='{}'",Trace::trunc(data));
2409 size_t cc0=0, start=0, end=0;
2410
2411 // the first line should have at least two columns separated by '|'
2412 size_t i = findTableColumns(data,start,end,cc0);
2413 if (i>=data.size() || cc0<1)
2414 {
2415 AUTO_TRACE_EXIT("result=false: no |'s in the header");
2416 return FALSE;
2417 }
2418
2419 size_t cc1 = 0;
2420 size_t ret = findTableColumns(data.substr(i),start,end,cc1);
2421 size_t j=i+start;
2422 // separator line should consist of |, - and : and spaces only
2423 while (j<=end+i)
2424 {
2425 if (data[j]!=':' && data[j]!='-' && data[j]!='|' && data[j]!=' ')
2426 {
2427 AUTO_TRACE_EXIT("result=false: invalid character '{}'",data[j]);
2428 return FALSE; // invalid characters in table separator
2429 }
2430 j++;
2431 }
2432 if (cc1!=cc0) // number of columns should be same as previous line
2433 {
2434 AUTO_TRACE_EXIT("result=false: different number of columns as previous line {}!={}",cc1,cc0);
2435 return FALSE;
2436 }
2437
2438 i+=ret; // goto next line
2439 size_t cc2 = 0;
2440 findTableColumns(data.substr(i),start,end,cc2);
2441
2442 AUTO_TRACE_EXIT("result={}",cc1==cc2);
2443 return cc1==cc2;
2444}
2445
2446size_t Markdown::Private::writeTableBlock(std::string_view data)
2447{
2448 AUTO_TRACE("data='{}'",Trace::trunc(data));
2449 const size_t size = data.size();
2450
2451 size_t columns=0, start=0, end=0;
2452 size_t i = findTableColumns(data,start,end,columns);
2453 size_t headerStart = start;
2454 size_t headerEnd = end;
2455
2456 // read cell alignments
2457 size_t cc = 0;
2458 size_t ret = findTableColumns(data.substr(i),start,end,cc);
2459 size_t k=0;
2460 std::vector<int> columnAlignment(columns);
2461
2462 bool leftMarker=false, rightMarker=false, startFound=false;
2463 size_t j=start+i;
2464 while (j<=end+i)
2465 {
2466 if (!startFound)
2467 {
2468 if (data[j]==':') { leftMarker=TRUE; startFound=TRUE; }
2469 if (data[j]=='-') startFound=TRUE;
2470 //printf(" data[%d]=%c startFound=%d\n",j,data[j],startFound);
2471 }
2472 if (data[j]=='-') rightMarker=FALSE;
2473 else if (data[j]==':') rightMarker=TRUE;
2474 if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
2475 {
2476 if (k<columns)
2477 {
2478 columnAlignment[k] = markersToAlignment(leftMarker,rightMarker);
2479 //printf("column[%d] alignment=%d\n",k,columnAlignment[k]);
2480 leftMarker=FALSE;
2481 rightMarker=FALSE;
2482 startFound=FALSE;
2483 }
2484 k++;
2485 }
2486 j++;
2487 }
2488 if (k<columns)
2489 {
2490 columnAlignment[k] = markersToAlignment(leftMarker,rightMarker);
2491 //printf("column[%d] alignment=%d\n",k,columnAlignment[k]);
2492 }
2493 // proceed to next line
2494 i+=ret;
2495
2496 // Store the table cell information by row then column. This
2497 // allows us to handle row spanning.
2498 std::vector<std::vector<TableCell> > tableContents;
2499
2500 size_t m = headerStart;
2501 std::vector<TableCell> headerContents(columns);
2502 for (k=0;k<columns;k++)
2503 {
2504 while (m<=headerEnd && (data[m]!='|' || (m>0 && data[m-1]=='\\')))
2505 {
2506 headerContents[k].cellText += data[m++];
2507 }
2508 m++;
2509 // do the column span test before stripping white space
2510 // || is spanning columns, | | is not
2511 headerContents[k].colSpan = headerContents[k].cellText.isEmpty();
2512 headerContents[k].cellText = headerContents[k].cellText.stripWhiteSpace();
2513 }
2514 tableContents.push_back(headerContents);
2515
2516 // write table cells
2517 while (i<size)
2518 {
2519 ret = findTableColumns(data.substr(i),start,end,cc);
2520 if (cc!=columns) break; // end of table
2521
2522 j=start+i;
2523 k=0;
2524 std::vector<TableCell> rowContents(columns);
2525 while (j<=end+i)
2526 {
2527 if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
2528 {
2529 // do the column span test before stripping white space
2530 // || is spanning columns, | | is not
2531 rowContents[k].colSpan = rowContents[k].cellText.isEmpty();
2532 rowContents[k].cellText = rowContents[k].cellText.stripWhiteSpace();
2533 k++;
2534 } // if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
2535 else
2536 {
2537 rowContents[k].cellText += data[j];
2538 } // else { if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\'))) }
2539 j++;
2540 } // while (j<=end+i)
2541 // do the column span test before stripping white space
2542 // || is spanning columns, | | is not
2543 rowContents[k].colSpan = rowContents[k].cellText.isEmpty();
2544 rowContents[k].cellText = rowContents[k].cellText.stripWhiteSpace();
2545 tableContents.push_back(rowContents);
2546
2547 // proceed to next line
2548 i+=ret;
2549 }
2550
2551 out+="<table class=\"markdownTable\">";
2552 QCString cellTag("th"), cellClass("class=\"markdownTableHead");
2553 for (size_t row = 0; row < tableContents.size(); row++)
2554 {
2555 if (row)
2556 {
2557 if (row % 2)
2558 {
2559 out+="\n<tr class=\"markdownTableRowOdd\">";
2560 }
2561 else
2562 {
2563 out+="\n<tr class=\"markdownTableRowEven\">";
2564 }
2565 }
2566 else
2567 {
2568 out+="\n <tr class=\"markdownTableHead\">";
2569 }
2570 for (size_t c = 0; c < columns; c++)
2571 {
2572 // save the cell text for use after column span computation
2573 QCString cellText(tableContents[row][c].cellText);
2574
2575 // Row span handling. Spanning rows will contain a caret ('^').
2576 // If the current cell contains just a caret, this is part of an
2577 // earlier row's span and the cell should not be added to the
2578 // output.
2579 if (tableContents[row][c].cellText == "^")
2580 {
2581 continue;
2582 }
2583 if (tableContents[row][c].colSpan)
2584 {
2585 int cr = static_cast<int>(c);
2586 while ( cr >= 0 && tableContents[row][cr].colSpan)
2587 {
2588 cr--;
2589 };
2590 if (cr >= 0 && tableContents[row][cr].cellText == "^") continue;
2591 }
2592 size_t rowSpan = 1, spanRow = row+1;
2593 while ((spanRow < tableContents.size()) &&
2594 (tableContents[spanRow][c].cellText == "^"))
2595 {
2596 spanRow++;
2597 rowSpan++;
2598 }
2599
2600 out+=" <" + cellTag + " " + cellClass;
2601 // use appropriate alignment style
2602 switch (columnAlignment[c])
2603 {
2604 case AlignLeft: out+="Left\""; break;
2605 case AlignRight: out+="Right\""; break;
2606 case AlignCenter: out+="Center\""; break;
2607 case AlignNone: out+="None\""; break;
2608 }
2609
2610 if (rowSpan > 1)
2611 {
2612 QCString spanStr;
2613 spanStr.setNum(rowSpan);
2614 out+=" rowspan=\"" + spanStr + "\"";
2615 }
2616 // Column span handling, assumes that column spans will have
2617 // empty strings, which would indicate the sequence "||", used
2618 // to signify spanning columns.
2619 size_t colSpan = 1;
2620 while ((c+1 < columns) && tableContents[row][c+1].colSpan)
2621 {
2622 c++;
2623 colSpan++;
2624 }
2625 if (colSpan > 1)
2626 {
2627 QCString spanStr;
2628 spanStr.setNum(colSpan);
2629 out+=" colspan=\"" + spanStr + "\"";
2630 }
2631 // need at least one space on either side of the cell text in
2632 // order for doxygen to do other formatting
2633 out+="> " + cellText + " \\ilinebr </" + cellTag + ">";
2634 }
2635 cellTag = "td";
2636 cellClass = "class=\"markdownTableBody";
2637 out+=" </tr>";
2638 }
2639 out+="</table>\n";
2640
2641 AUTO_TRACE_EXIT("i={}",i);
2642 return i;
2643}
2644
2645
2646static bool hasLineBreak(std::string_view data)
2647{
2648 AUTO_TRACE("data='{}'",Trace::trunc(data));
2649 size_t i=0;
2650 size_t j=0;
2651 // search for end of line and also check if it is not a completely blank
2652 while (i<data.size() && data[i]!='\n')
2653 {
2654 if (data[i]!=' ' && data[i]!='\t') j++; // some non whitespace
2655 i++;
2656 }
2657 if (i>=data.size()) { return 0; } // empty line
2658 if (i<2) { return 0; } // not long enough
2659 bool res = (j>0 && data[i-1]==' ' && data[i-2]==' '); // non blank line with at two spaces at the end
2660 AUTO_TRACE_EXIT("result={}",res);
2661 return res;
2662}
2663
2664
2666{
2667 AUTO_TRACE("data='{}'",Trace::trunc(data));
2668 int level=0;
2669 QCString header;
2670 QCString id;
2671 if (isHRuler(data))
2672 {
2673 out+="<hr>\n";
2674 }
2675 else if ((level=isAtxHeader(data,header,id,TRUE)))
2676 {
2677 QCString hTag;
2678 if (!id.isEmpty())
2679 {
2680 switch (level)
2681 {
2682 case SectionType::Section: out+="@section "; break;
2683 case SectionType::Subsection: out+="@subsection "; break;
2684 case SectionType::Subsubsection: out+="@subsubsection "; break;
2685 case SectionType::Paragraph: out+="@paragraph "; break;
2686 case SectionType::Subparagraph: out+="@subparagraph "; break;
2687 case SectionType::Subsubparagraph: out+="@subsubparagraph "; break;
2688 }
2689 out+=id;
2690 out+=" ";
2691 out+=header;
2692 out+="\n";
2693 }
2694 else
2695 {
2696 hTag.sprintf("h%d",level);
2697 out+="<"+hTag+">";
2698 out+=header;
2699 out+="</"+hTag+">\n";
2700 }
2701 }
2702 else if (data.size()>0) // nothing interesting -> just output the line
2703 {
2704 size_t tmpSize = data.size();
2705 if (data[data.size()-1] == '\n') tmpSize--;
2706 out+=data.substr(0,tmpSize);
2707
2708 if (hasLineBreak(data))
2709 {
2710 out+="\\ilinebr<br>";
2711 }
2712 if (tmpSize != data.size()) out+='\n';
2713 }
2714}
2715
2716static const std::unordered_map<std::string,std::string> g_quotationHeaderMap = {
2717 // GitHub style Doxygen command
2718 { "[!note]", "\\note" },
2719 { "[!warning]", "\\warning" },
2720 { "[!tip]", "\\remark" },
2721 { "[!caution]", "\\attention" },
2722 { "[!important]", "\\important" }
2723};
2724
2725size_t Markdown::Private::writeBlockQuote(std::string_view data)
2726{
2727 AUTO_TRACE("data='{}'",Trace::trunc(data));
2728 size_t i=0;
2729 int curLevel=0;
2730 size_t end=0;
2731 const size_t size = data.size();
2732 std::string startCmd;
2733 int isGitHubAlert = false;
2734 int isGitHubFirst = false;
2735 while (i<size)
2736 {
2737 // find end of this line
2738 end=i+1;
2739 while (end<=size && data[end-1]!='\n') end++;
2740 size_t j=i;
2741 int level=0;
2742 size_t indent=i;
2743 // compute the quoting level
2744 while (j<end && (data[j]==' ' || data[j]=='>'))
2745 {
2746 if (data[j]=='>') { level++; indent=j+1; }
2747 else if (j>0 && data[j-1]=='>') indent=j+1;
2748 j++;
2749 }
2750 if (indent>0 && j>0 && data[j-1]=='>' &&
2751 !(j==size || data[j]=='\n')) // disqualify last > if not followed by space
2752 {
2753 indent--;
2754 level--;
2755 j--;
2756 }
2757 AUTO_TRACE_ADD("indent={} i={} j={} end={} level={} line={}",indent,i,j,end,level,Trace::trunc(&data[i]));
2758 if (level==0 && j<end-1)
2759 {
2760 level = curLevel; // lazy
2761 }
2762 if (level==1)
2763 {
2764 QCString txt = stripWhiteSpace(data.substr(indent,end-indent));
2765 auto it = g_quotationHeaderMap.find(txt.lower().str()); // TODO: in C++20 the std::string can be dropped
2766 if (it != g_quotationHeaderMap.end())
2767 {
2768 isGitHubAlert = true;
2769 isGitHubFirst = true;
2770 startCmd = it->second;
2771 }
2772 }
2773 if (level>curLevel) // quote level increased => add start markers
2774 {
2775 if (level!=1 || !isGitHubAlert) // normal block quote
2776 {
2777 for (int l=curLevel;l<level-1;l++)
2778 {
2779 out+="<blockquote>";
2780 }
2781 out += "<blockquote>&zwj;"; // empty blockquotes are also shown
2782 }
2783 else if (!startCmd.empty()) // GitHub style alert
2784 {
2785 out += startCmd + " ";
2786 }
2787 }
2788 else if (level<curLevel) // quote level decreased => add end markers
2789 {
2790 int decrLevel = curLevel;
2791 if (level==0 && isGitHubAlert)
2792 {
2793 decrLevel--;
2794 }
2795 for (int l=level;l<decrLevel;l++)
2796 {
2797 out += "</blockquote>";
2798 }
2799 }
2800 if (level==0)
2801 {
2802 curLevel=0;
2803 break; // end of quote block
2804 }
2805 // copy line without quotation marks
2806 if (curLevel!=0 || !isGitHubAlert)
2807 {
2808 std::string_view txt = data.substr(indent,end-indent);
2809 if (stripWhiteSpace(txt).empty() && !startCmd.empty())
2810 {
2811 if (!isGitHubFirst) out += "<br>";
2812 out += "<br>\n";
2813 }
2814 else
2815 {
2816 out += txt;
2817 }
2818 isGitHubFirst = false;
2819 }
2820 else // GitHub alert section
2821 {
2822 out+= "\n";
2823 }
2824 curLevel=level;
2825 // proceed with next line
2826 i=end;
2827 }
2828 // end of comment within blockquote => add end markers
2829 if (isGitHubAlert) // GitHub alert doesn't have a blockquote
2830 {
2831 curLevel--;
2832 }
2833 for (int l=0;l<curLevel;l++)
2834 {
2835 out+="</blockquote>";
2836 }
2837 AUTO_TRACE_EXIT("i={}",i);
2838 return i;
2839}
2840
2841// For code blocks that are outputted as part of an indented include or snippet command, we need to filter out
2842// the location string, i.e. '\ifile "..." \iline \ilinebr'.
2843bool skipOverFileAndLineCommands(std::string_view data,size_t indent,size_t &offset,std::string &location)
2844{
2845 size_t i = offset;
2846 size_t size = data.size();
2847 while (i<data.size() && data[i]==' ') i++;
2848 if (i<size+8 && data[i]=='\\' && qstrncmp(&data[i+1],"ifile \"",7)==0)
2849 {
2850 size_t locStart = i;
2851 if (i>offset) locStart--; // include the space before \ifile
2852 i+=8;
2853 bool found=false;
2854 while (i<size-9 && data[i]!='\n')
2855 {
2856 if (data[i]=='\\' && qstrncmp(&data[i+1],"ilinebr ",8)==0)
2857 {
2858 found=true;
2859 break;
2860 }
2861 i++;
2862 }
2863 if (found)
2864 {
2865 i+=9;
2866 location=data.substr(locStart,i-locStart);
2867 location+='\n';
2868 while (indent>0 && i<size && data[i]==' ') i++,indent--;
2869 if (i<size && data[i]=='\n') i++;
2870 offset = i;
2871 return true;
2872 }
2873 }
2874 return false;
2875}
2876
2877size_t Markdown::Private::writeCodeBlock(std::string_view data,size_t refIndent)
2878{
2879 AUTO_TRACE("data='{}' refIndent={}",Trace::trunc(data),refIndent);
2880 const size_t size = data.size();
2881 size_t i=0;
2882 // no need for \ilinebr here as the previous line was empty and was skipped
2883 out+="@iverbatim\n";
2884 int emptyLines=0;
2885 std::string location;
2886 while (i<size)
2887 {
2888 // find end of this line
2889 size_t end=i+1;
2890 while (end<=size && data[end-1]!='\n') end++;
2891 size_t j=i;
2892 size_t indent=0;
2893 while (j<end && data[j]==' ') j++,indent++;
2894 //printf("j=%d end=%d indent=%d refIndent=%d tabSize=%d data={%s}\n",
2895 // j,end,indent,refIndent,Config_getInt(TAB_SIZE),qPrint(QCString(data+i).left(end-i-1)));
2896 if (j==end-1) // empty line
2897 {
2898 emptyLines++;
2899 i=end;
2900 }
2901 else if (indent>=refIndent+codeBlockIndent) // enough indent to continue the code block
2902 {
2903 while (emptyLines>0) // write skipped empty lines
2904 {
2905 // add empty line
2906 out+="\n";
2907 emptyLines--;
2908 }
2909 // add code line minus the indent
2910 size_t offset = i+refIndent+codeBlockIndent;
2911 std::string lineLoc;
2912 if (skipOverFileAndLineCommands(data,codeBlockIndent,offset,lineLoc))
2913 {
2914 location = lineLoc;
2915 }
2916 out+=data.substr(offset,end-offset);
2917 i=end;
2918 }
2919 else // end of code block
2920 {
2921 break;
2922 }
2923 }
2924 out+="@endiverbatim";
2925 if (!location.empty())
2926 {
2927 out+=location;
2928 }
2929 else
2930 {
2931 out+="\\ilinebr ";
2932 }
2933 while (emptyLines>0) // write skipped empty lines
2934 {
2935 // add empty line
2936 out+="\n";
2937 emptyLines--;
2938 }
2939 AUTO_TRACE_EXIT("i={}",i);
2940 return i;
2941}
2942
2943// start searching for the end of the line start at offset \a i
2944// keeping track of possible blocks that need to be skipped.
2945size_t Markdown::Private::findEndOfLine(std::string_view data,size_t offset)
2946{
2947 AUTO_TRACE("data='{}'",Trace::trunc(data));
2948 // find end of the line
2949 const size_t size = data.size();
2950 size_t nb=0, end=offset+1, j=0;
2951 while (end<=size && (j=isNewline(data.data()+end-1))==0)
2952 {
2953 // while looking for the end of the line we might encounter a block
2954 // that needs to be passed unprocessed.
2955 if ((data[end-1]=='\\' || data[end-1]=='@') && // command
2956 (end<=1 || (data[end-2]!='\\' && data[end-2]!='@')) // not escaped
2957 )
2958 {
2959 QCString endBlockName = isBlockCommand(data.substr(end-1),end-1);
2960 end++;
2961 if (!endBlockName.isEmpty())
2962 {
2963 size_t l = endBlockName.length();
2964 for (;end<size-l-1;end++) // search for end of block marker
2965 {
2966 if ((data[end]=='\\' || data[end]=='@') &&
2967 data[end-1]!='\\' && data[end-1]!='@'
2968 )
2969 {
2970 if (qstrncmp(&data[end+1],endBlockName.data(),l)==0)
2971 {
2972 // found end marker, skip over this block
2973 //printf("feol.block out={%s}\n",qPrint(QCString(data+i).left(end+l+1-i)));
2974 end = end + l + 2;
2975 break;
2976 }
2977 }
2978 }
2979 }
2980 }
2981 else if (nb==0 && data[end-1]=='<' && size>=6 && end<size-6 &&
2982 (end<=1 || (data[end-2]!='\\' && data[end-2]!='@'))
2983 )
2984 {
2985 if (tolower(data[end])=='p' && tolower(data[end+1])=='r' &&
2986 tolower(data[end+2])=='e' && (data[end+3]=='>' || data[end+3]==' ')) // <pre> tag
2987 {
2988 // skip part until including </pre>
2989 end = end + processHtmlTagWrite(data.substr(end-1),end-1,false);
2990 break;
2991 }
2992 else
2993 {
2994 end++;
2995 }
2996 }
2997 else if (nb==0 && data[end-1]=='`')
2998 {
2999 while (end<=size && data[end-1]=='`') end++,nb++;
3000 }
3001 else if (nb>0 && data[end-1]=='`')
3002 {
3003 size_t enb=0;
3004 while (end<=size && data[end-1]=='`') end++,enb++;
3005 if (enb==nb) nb=0;
3006 }
3007 else
3008 {
3009 end++;
3010 }
3011 }
3012 if (j>0) end+=j-1;
3013 AUTO_TRACE_EXIT("offset={} end={}",offset,end);
3014 return end;
3015}
3016
3017void Markdown::Private::writeFencedCodeBlock(std::string_view data,std::string_view lang,
3018 size_t blockStart,size_t blockEnd)
3019{
3020 AUTO_TRACE("data='{}' lang={} blockStart={} blockEnd={}",Trace::trunc(data),lang,blockStart,blockEnd);
3021 if (!lang.empty() && lang[0]=='.') lang=lang.substr(1);
3022 const size_t size=data.size();
3023 size_t i=0;
3024 while (i<size && (data[i]==' ' || data[i]=='\t'))
3025 {
3026 out+=data[i++];
3027 blockStart--;
3028 blockEnd--;
3029 }
3030 out+="@icode";
3031 if (!lang.empty())
3032 {
3033 out+="{"+lang+"}";
3034 }
3035 out+=" ";
3036 addStrEscapeUtf8Nbsp(data.substr(blockStart+i,blockEnd-blockStart));
3037 out+="@endicode ";
3038}
3039
3040QCString Markdown::Private::processQuotations(std::string_view data,size_t refIndent)
3041{
3042 AUTO_TRACE("data='{}' refIndex='{}'",Trace::trunc(data),refIndent);
3043 out.clear();
3044 size_t i=0,end=0;
3045 size_t pi=std::string::npos;
3046 bool newBlock = false;
3047 bool insideList = false;
3048 size_t currentIndent = refIndent;
3049 size_t listIndent = refIndent;
3050 const size_t size = data.size();
3051 QCString lang;
3052 while (i<size)
3053 {
3054 end = findEndOfLine(data,i);
3055 // line is now found at [i..end)
3056
3057 size_t lineIndent=0;
3058 while (lineIndent<end && data[i+lineIndent]==' ') lineIndent++;
3059 //printf("** lineIndent=%d line=(%s)\n",lineIndent,qPrint(QCString(data+i).left(end-i)));
3060
3061 if (newBlock)
3062 {
3063 //printf("** end of block\n");
3064 if (insideList && lineIndent<currentIndent) // end of list
3065 {
3066 //printf("** end of list\n");
3067 currentIndent = refIndent;
3068 insideList = false;
3069 }
3070 newBlock = false;
3071 }
3072
3073 if ((listIndent=isListMarker(data.substr(i,end-i)))) // see if we need to increase the indent level
3074 {
3075 if (listIndent<currentIndent+4)
3076 {
3077 //printf("** start of list\n");
3078 insideList = true;
3079 currentIndent = listIndent;
3080 }
3081 }
3082 else if (isEndOfList(data.substr(i,end-i)))
3083 {
3084 //printf("** end of list\n");
3085 insideList = false;
3086 currentIndent = listIndent;
3087 }
3088 else if (isEmptyLine(data.substr(i,end-i)))
3089 {
3090 //printf("** new block\n");
3091 newBlock = true;
3092 }
3093 //printf("currentIndent=%d listIndent=%d refIndent=%d\n",currentIndent,listIndent,refIndent);
3094
3095 if (pi!=std::string::npos)
3096 {
3097 size_t blockStart=0, blockEnd=0, blockOffset=0;
3098 if (isFencedCodeBlock(data.substr(pi),currentIndent,lang,blockStart,blockEnd,blockOffset))
3099 {
3100 auto addSpecialCommand = [&](const QCString &startCmd,const QCString &endCmd)
3101 {
3102 size_t cmdPos = pi+blockStart+1;
3103 QCString pl = data.substr(cmdPos,blockEnd-blockStart-1);
3104 size_t ii = 0;
3105 int nl = 1;
3106 // check for absence of start command, either @start<cmd>, or \\start<cmd>
3107 while (ii<pl.length() && qisspace(pl[ii]))
3108 {
3109 if (pl[ii]=='\n') nl++;
3110 ii++; // skip leading whitespace
3111 }
3112 bool addNewLines = false;
3113 if (ii+startCmd.length()>=pl.length() || // no room for start command
3114 (pl[ii]!='\\' && pl[ii]!='@') || // no @ or \ after whitespace
3115 qstrncmp(pl.data()+ii+1,startCmd.data(),startCmd.length())!=0) // no start command
3116 {
3117 // input: output:
3118 // ----------------------------------------------------
3119 // ```{plantuml} => @startuml
3120 // A->B A->B
3121 // ``` @enduml
3122 // ----------------------------------------------------
3123 pl = "@"+startCmd+"\n" + pl + "@"+endCmd;
3124 addNewLines = false;
3125 }
3126 else // we have a @start... command inside the code block
3127 {
3128 // input: output:
3129 // ----------------------------------------------------
3130 // ```{plantuml} \n
3131 // \n
3132 // @startuml => @startuml
3133 // A->B A->B
3134 // @enduml @enduml
3135 // ``` \n
3136 // ----------------------------------------------------
3137 addNewLines = true;
3138 }
3139 if (addNewLines) for (int j=0;j<nl;j++) out+='\n';
3140 processSpecialCommand(pl.view().substr(ii),ii);
3141 if (addNewLines) out+='\n';
3142 };
3143
3144 if (!Config_getString(PLANTUML_JAR_PATH).isEmpty() && lang=="plantuml")
3145 {
3146 addSpecialCommand("startuml","enduml");
3147 }
3148 else if (Config_getBool(HAVE_DOT) && lang=="dot")
3149 {
3150 addSpecialCommand("dot","enddot");
3151 }
3152 else if (lang=="msc") // msc is built-in
3153 {
3154 addSpecialCommand("msc","endmsc");
3155 }
3156 else // normal code block
3157 {
3158 writeFencedCodeBlock(data.substr(pi),lang.view(),blockStart,blockEnd);
3159 }
3160 i=pi+blockOffset;
3161 pi=std::string::npos;
3162 end=i+1;
3163 continue;
3164 }
3165 else if (isBlockQuote(data.substr(pi,i-pi),currentIndent))
3166 {
3167 i = pi+writeBlockQuote(data.substr(pi));
3168 pi=std::string::npos;
3169 end=i+1;
3170 continue;
3171 }
3172 else
3173 {
3174 //printf("quote out={%s}\n",QCString(data+pi).left(i-pi).data());
3175 out+=data.substr(pi,i-pi);
3176 }
3177 }
3178 pi=i;
3179 i=end;
3180 }
3181 if (pi!=std::string::npos && pi<size) // deal with the last line
3182 {
3183 if (isBlockQuote(data.substr(pi),currentIndent))
3184 {
3185 writeBlockQuote(data.substr(pi));
3186 }
3187 else
3188 {
3189 out+=data.substr(pi);
3190 }
3191 }
3192
3193 //printf("Process quotations\n---- input ----\n%s\n---- output ----\n%s\n------------\n",
3194 // qPrint(s),prv->out.get());
3195
3196 return out;
3197}
3198
3199QCString Markdown::Private::processBlocks(std::string_view data,const size_t indent)
3200{
3201 AUTO_TRACE("data='{}' indent={}",Trace::trunc(data),indent);
3202 out.clear();
3203 size_t pi = std::string::npos;
3204 QCString id,link,title;
3205
3206#if 0 // commented out, since starting with a comment block is probably a usage error
3207 // see also http://stackoverflow.com/q/20478611/784672
3208
3209 // special case when the documentation starts with a code block
3210 // since the first line is skipped when looking for a code block later on.
3211 if (end>codeBlockIndent && isCodeBlock(data,0,end,blockIndent))
3212 {
3213 i=writeCodeBlock(out,data,size,blockIndent);
3214 end=i+1;
3215 pi=-1;
3216 }
3217#endif
3218
3219 size_t currentIndent = indent;
3220 size_t listIndent = indent;
3221 bool insideList = false;
3222 bool newBlock = false;
3223 // process each line
3224 size_t i=0;
3225 while (i<data.size())
3226 {
3227 size_t end = findEndOfLine(data,i);
3228 // line is now found at [i..end)
3229
3230 size_t lineIndent=0;
3231 int level = 0;
3232 while (lineIndent<end && data[i+lineIndent]==' ') lineIndent++;
3233 //printf("** lineIndent=%d line=(%s)\n",lineIndent,qPrint(QCString(data+i).left(end-i)));
3234
3235 if (newBlock)
3236 {
3237 //printf("** end of block\n");
3238 if (insideList && lineIndent<currentIndent) // end of list
3239 {
3240 //printf("** end of list\n");
3241 currentIndent = indent;
3242 insideList = false;
3243 }
3244 newBlock = false;
3245 }
3246
3247 if ((listIndent=isListMarker(data.substr(i,end-i)))) // see if we need to increase the indent level
3248 {
3249 if (listIndent<currentIndent+4)
3250 {
3251 //printf("** start of list\n");
3252 insideList = true;
3253 currentIndent = listIndent;
3254 }
3255 }
3256 else if (isEndOfList(data.substr(i,end-i)))
3257 {
3258 //printf("** end of list\n");
3259 insideList = false;
3260 currentIndent = listIndent;
3261 }
3262 else if (isEmptyLine(data.substr(i,end-i)))
3263 {
3264 //printf("** new block\n");
3265 newBlock = true;
3266 }
3267
3268 //printf("indent=%d listIndent=%d blockIndent=%d\n",indent,listIndent,blockIndent);
3269
3270 //printf("findEndOfLine: pi=%d i=%d end=%d\n",pi,i,end);
3271
3272 if (pi!=std::string::npos)
3273 {
3274 size_t blockStart=0, blockEnd=0, blockOffset=0;
3275 QCString lang;
3276 size_t blockIndent = currentIndent;
3277 size_t ref = 0;
3278 //printf("isHeaderLine(%s)=%d\n",QCString(data+i).left(size-i).data(),level);
3279 QCString endBlockName;
3280 if (data[i]=='@' || data[i]=='\\') endBlockName = isBlockCommand(data.substr(i),i);
3281 if (!endBlockName.isEmpty())
3282 {
3283 // handle previous line
3284 if (isLinkRef(data.substr(pi,i-pi),id,link,title))
3285 {
3286 linkRefs.emplace(id.lower().str(),LinkRef(link,title));
3287 }
3288 else
3289 {
3290 writeOneLineHeaderOrRuler(data.substr(pi,i-pi));
3291 }
3292 out+=data[i];
3293 i++;
3294 size_t l = endBlockName.length();
3295 while (i+l<data.size())
3296 {
3297 if ((data[i]=='\\' || data[i]=='@') && // command
3298 data[i-1]!='\\' && data[i-1]!='@') // not escaped
3299 {
3300 if (qstrncmp(&data[i+1],endBlockName.data(),l)==0)
3301 {
3302 out+=data[i];
3303 out+=endBlockName;
3304 i+=l+1;
3305 break;
3306 }
3307 }
3308 out+=data[i];
3309 i++;
3310 }
3311 }
3312 else if ((level=isHeaderline(data.substr(i),TRUE))>0)
3313 {
3314 //printf("Found header at %d-%d\n",i,end);
3315 while (pi<data.size() && data[pi]==' ') pi++;
3316 QCString header = data.substr(pi,i-pi-1);
3317 id = extractTitleId(header, level);
3318 //printf("header='%s' is='%s'\n",qPrint(header),qPrint(id));
3319 if (!header.isEmpty())
3320 {
3321 if (!id.isEmpty())
3322 {
3323 out+=level==1?"@section ":"@subsection ";
3324 out+=id;
3325 out+=" ";
3326 out+=header;
3327 out+="\n\n";
3328 }
3329 else
3330 {
3331 out+=level==1?"<h1>":"<h2>";
3332 out+=header;
3333 out+=level==1?"\n</h1>\n":"\n</h2>\n";
3334 }
3335 }
3336 else
3337 {
3338 out+="\n<hr>\n";
3339 }
3340 pi=std::string::npos;
3341 i=end;
3342 end=i+1;
3343 continue;
3344 }
3345 else if ((ref=isLinkRef(data.substr(pi),id,link,title)))
3346 {
3347 //printf("found link ref: id='%s' link='%s' title='%s'\n",
3348 // qPrint(id),qPrint(link),qPrint(title));
3349 linkRefs.emplace(id.lower().str(),LinkRef(link,title));
3350 i=ref+pi;
3351 end=i+1;
3352 }
3353 else if (isFencedCodeBlock(data.substr(pi),currentIndent,lang,blockStart,blockEnd,blockOffset))
3354 {
3355 //printf("Found FencedCodeBlock lang='%s' start=%d end=%d code={%s}\n",
3356 // qPrint(lang),blockStart,blockEnd,QCString(data+pi+blockStart).left(blockEnd-blockStart).data());
3357 writeFencedCodeBlock(data.substr(pi),lang.view(),blockStart,blockEnd);
3358 i=pi+blockOffset;
3359 pi=std::string::npos;
3360 end=i+1;
3361 continue;
3362 }
3363 else if (isCodeBlock(data.substr(i,end-i),i,blockIndent))
3364 {
3365 // skip previous line (it is empty anyway)
3366 i+=writeCodeBlock(data.substr(i),blockIndent);
3367 pi=std::string::npos;
3368 end=i+1;
3369 continue;
3370 }
3371 else if (isTableBlock(data.substr(pi)))
3372 {
3373 i=pi+writeTableBlock(data.substr(pi));
3374 pi=std::string::npos;
3375 end=i+1;
3376 continue;
3377 }
3378 else
3379 {
3380 writeOneLineHeaderOrRuler(data.substr(pi,i-pi));
3381 }
3382 }
3383 pi=i;
3384 i=end;
3385 }
3386 //printf("last line %d size=%d\n",i,size);
3387 if (pi!=std::string::npos && pi<data.size()) // deal with the last line
3388 {
3389 if (isLinkRef(data.substr(pi),id,link,title))
3390 {
3391 //printf("found link ref: id='%s' link='%s' title='%s'\n",
3392 // qPrint(id),qPrint(link),qPrint(title));
3393 linkRefs.emplace(id.lower().str(),LinkRef(link,title));
3394 }
3395 else
3396 {
3397 writeOneLineHeaderOrRuler(data.substr(pi));
3398 }
3399 }
3400
3401 return out;
3402}
3403
3404
3405static ExplicitPageResult isExplicitPage(const QCString &docs)
3406{
3407 AUTO_TRACE("docs={}",Trace::trunc(docs));
3408 size_t i=0;
3409 std::string_view data(docs.str());
3410 const size_t size = data.size();
3411 if (!data.empty())
3412 {
3413 while (i<size && (data[i]==' ' || data[i]=='\n'))
3414 {
3415 i++;
3416 }
3417 if (i<size-5 && data[i]=='<' && qstrncmp(&data[i],"<!--!",5)==0) // skip over <!--! marker
3418 {
3419 i+=5;
3420 while (i<size && (data[i]==' ' || data[i]=='\n')) // skip over spaces after the <!--! marker
3421 {
3422 i++;
3423 }
3424 }
3425 if (i<size-1 &&
3426 (data[i]=='\\' || data[i]=='@') &&
3427 (qstrncmp(&data[i+1],"page ",5)==0 || qstrncmp(&data[i+1],"mainpage",8)==0)
3428 )
3429 {
3430 if (qstrncmp(&data[i+1],"page ",5)==0)
3431 {
3432 AUTO_TRACE_EXIT("result=ExplicitPageResult::explicitPage");
3434 }
3435 else
3436 {
3437 AUTO_TRACE_EXIT("result=ExplicitPageResult::explicitMainPage");
3439 }
3440 }
3441 else if (i<size-1 &&
3442 (data[i]=='\\' || data[i]=='@') &&
3443 (qstrncmp(&data[i+1],"dir\n",4)==0 || qstrncmp(&data[i+1],"dir ",4)==0)
3444 )
3445 {
3446 AUTO_TRACE_EXIT("result=ExplicitPageResult::explicitDirPage");
3448 }
3449 }
3450 AUTO_TRACE_EXIT("result=ExplicitPageResult::notExplicit");
3452}
3453
3454QCString Markdown::extractPageTitle(QCString &docs, QCString &id, int &prepend, bool &isIdGenerated)
3455{
3456 AUTO_TRACE("docs={} prepend={}",Trace::trunc(docs),id,prepend);
3457 // first first non-empty line
3458 prepend = 0;
3459 QCString title;
3460 size_t i=0;
3461 QCString docs_org(docs);
3462 std::string_view data(docs_org.str());
3463 const size_t size = data.size();
3464 docs.clear();
3465 while (i<size && (data[i]==' ' || data[i]=='\n'))
3466 {
3467 if (data[i]=='\n') prepend++;
3468 i++;
3469 }
3470 if (i>=size) { return QCString(); }
3471 size_t end1=i+1;
3472 while (end1<size && data[end1-1]!='\n') end1++;
3473 //printf("i=%d end1=%d size=%d line='%s'\n",i,end1,size,docs.mid(i,end1-i).data());
3474 // first line from i..end1
3475 if (end1<size)
3476 {
3477 // second line form end1..end2
3478 size_t end2=end1+1;
3479 while (end2<size && data[end2-1]!='\n') end2++;
3480 if (prv->isHeaderline(data.substr(end1),FALSE))
3481 {
3482 title = data.substr(i,end1-i-1);
3483 docs+="\n\n"+docs_org.mid(end2);
3484 id = prv->extractTitleId(title, 0, &isIdGenerated);
3485 //printf("extractPageTitle(title='%s' docs='%s' id='%s')\n",title.data(),docs.data(),id.data());
3486 AUTO_TRACE_EXIT("result={} id={} isIdGenerated={}",Trace::trunc(title),id,isIdGenerated);
3487 return title;
3488 }
3489 }
3490 if (i<end1 && prv->isAtxHeader(data.substr(i,end1-i),title,id,FALSE,&isIdGenerated)>0)
3491 {
3492 docs+="\n";
3493 docs+=docs_org.mid(end1);
3494 }
3495 else
3496 {
3497 docs=docs_org;
3498 id = prv->extractTitleId(title, 0, &isIdGenerated);
3499 }
3500 AUTO_TRACE_EXIT("result={} id={} isIdGenerated={}",Trace::trunc(title),id,isIdGenerated);
3501 return title;
3502}
3503
3504
3505//---------------------------------------------------------------------------
3506
3507QCString Markdown::process(const QCString &input, int &startNewlines, bool fromParseInput)
3508{
3509 if (input.isEmpty()) return input;
3510 size_t refIndent=0;
3511
3512 // for replace tabs by spaces
3513 QCString s = input;
3514 if (s.at(s.length()-1)!='\n') s += "\n"; // see PR #6766
3515 s = detab(s,refIndent);
3516 //printf("======== DeTab =========\n---- output -----\n%s\n---------\n",qPrint(s));
3517
3518 // then process quotation blocks (as these may contain other blocks)
3519 s = prv->processQuotations(s.view(),refIndent);
3520 //printf("======== Quotations =========\n---- output -----\n%s\n---------\n",qPrint(s));
3521
3522 // then process block items (headers, rules, and code blocks, references)
3523 s = prv->processBlocks(s.view(),refIndent);
3524 //printf("======== Blocks =========\n---- output -----\n%s\n---------\n",qPrint(s));
3525
3526 // finally process the inline markup (links, emphasis and code spans)
3527 prv->out.clear();
3528 prv->out.reserve(s.length());
3529 prv->processInline(s.view());
3530 if (fromParseInput)
3531 {
3532 Debug::print(Debug::Markdown,0,"---- output -----\n%s\n=========\n",qPrint(prv->out));
3533 }
3534 else
3535 {
3536 Debug::print(Debug::Markdown,0,"======== Markdown =========\n---- input ------- \n%s\n---- output -----\n%s\n=========\n",qPrint(input),qPrint(prv->out));
3537 }
3538
3539 // post processing
3540 QCString result = substitute(prv->out,g_doxy_nbsp,"&nbsp;");
3541 const char *p = result.data();
3542 if (p)
3543 {
3544 while (*p==' ') p++; // skip over spaces
3545 while (*p=='\n') {startNewlines++;p++;}; // skip over newlines
3546 if (qstrncmp(p,"<br>",4)==0) p+=4; // skip over <br>
3547 }
3548 if (p>result.data())
3549 {
3550 // strip part of the input
3551 result = result.mid(static_cast<int>(p-result.data()));
3552 }
3553 return result;
3554}
3555
3556//---------------------------------------------------------------------------
3557
3558QCString markdownFileNameToId(const QCString &fileName)
3559{
3560 AUTO_TRACE("fileName={}",fileName);
3561 std::string absFileName = FileInfo(fileName.str()).absFilePath();
3562 QCString baseFn = stripFromPath(absFileName.c_str());
3563 int i = baseFn.findRev('.');
3564 if (i!=-1) baseFn = baseFn.left(i);
3565 QCString baseName = escapeCharsInString(baseFn,false,false);
3566 //printf("markdownFileNameToId(%s)=md_%s\n",qPrint(fileName),qPrint(baseName));
3567 QCString res = "md_"+baseName;
3568 AUTO_TRACE_EXIT("result={}",res);
3569 return res;
3570}
3571
3572//---------------------------------------------------------------------------
3573
3574struct MarkdownOutlineParser::Private
3575{
3576 CommentScanner commentScanner;
3577};
3578
3580{
3581}
3582
3586
3588 const char *fileBuf,
3589 const std::shared_ptr<Entry> &root,
3590 ClangTUParser* /*clangParser*/)
3591{
3592 std::shared_ptr<Entry> current = std::make_shared<Entry>();
3593 int prepend = 0; // number of empty lines in front
3594 current->lang = SrcLangExt::Markdown;
3595 current->fileName = fileName;
3596 current->docFile = fileName;
3597 current->docLine = 1;
3598 QCString docs = fileBuf;
3599 Debug::print(Debug::Markdown,0,"======== Markdown =========\n---- input ------- \n%s\n",qPrint(fileBuf));
3600 QCString id;
3601 Markdown markdown(fileName,1,0);
3602 bool isIdGenerated = false;
3603 QCString title = markdown.extractPageTitle(docs, id, prepend, isIdGenerated).stripWhiteSpace();
3604 QCString generatedId;
3605 if (isIdGenerated)
3606 {
3607 generatedId = id;
3608 id = "";
3609 }
3610 int indentLevel=title.isEmpty() ? 0 : -1;
3611 markdown.setIndentLevel(indentLevel);
3612 FileInfo fi(fileName.str());
3613 QCString fn = fi.fileName();
3614 QCString titleFn = stripExtensionGeneral(fn,getFileNameExtension(fn));
3615 QCString mdfileAsMainPage = Config_getString(USE_MDFILE_AS_MAINPAGE);
3616 QCString mdFileNameId = markdownFileNameToId(fileName);
3617 bool wasEmpty = id.isEmpty();
3618 if (wasEmpty) id = mdFileNameId;
3619 QCString relFileName = stripFromPath(fileName);
3620 bool isSubdirDocs = Config_getBool(IMPLICIT_DIR_DOCS) && relFileName.lower().endsWith("/readme.md");
3621 switch (isExplicitPage(docs))
3622 {
3624 if (!mdfileAsMainPage.isEmpty() &&
3625 (fi.absFilePath()==FileInfo(mdfileAsMainPage.str()).absFilePath()) // file reference with path
3626 )
3627 {
3628 docs.prepend("@ianchor{" + title + "} " + id + "\\ilinebr ");
3629 docs.prepend("@mainpage "+title+"\\ilinebr ");
3630 }
3631 else if (id=="mainpage" || id=="index")
3632 {
3633 if (title.isEmpty()) title = titleFn;
3634 docs.prepend("@ianchor{" + title + "} " + id + "\\ilinebr ");
3635 docs.prepend("@mainpage "+title+"\\ilinebr ");
3636 }
3637 else if (isSubdirDocs)
3638 {
3639 docs.prepend("@dir\\ilinebr ");
3640 }
3641 else
3642 {
3643 if (title.isEmpty())
3644 {
3645 title = titleFn;
3646 prepend = 0;
3647 }
3648 if (!wasEmpty)
3649 {
3650 docs.prepend("@ianchor{" + title + "} " + id + "\\ilinebr @ianchor{" + relFileName + "} " + mdFileNameId + "\\ilinebr ");
3651 }
3652 else if (!generatedId.isEmpty())
3653 {
3654 docs.prepend("@ianchor " + generatedId + "\\ilinebr ");
3655 }
3656 else if (Config_getEnum(MARKDOWN_ID_STYLE)==MARKDOWN_ID_STYLE_t::GITHUB)
3657 {
3658 QCString autoId = AnchorGenerator::instance().generate(title.str());
3659 docs.prepend("@ianchor{" + title + "} " + autoId + "\\ilinebr ");
3660 }
3661 docs.prepend("@page "+id+" "+title+"\\ilinebr ");
3662 }
3663 for (int i = 0; i < prepend; i++) docs.prepend("\n");
3664 break;
3666 {
3667 // look for `@page label My Title\n` and capture `label` (match[1]) and ` My Title` (match[2])
3668 static const reg::Ex re(R"([ ]*[\\@]page\s+(\a[\w-]*)(\s*[^\n]*)\n)");
3669 reg::Match match;
3670 std::string s = docs.str();
3671 if (reg::search(s,match,re))
3672 {
3673 QCString orgLabel = match[1].str();
3674 QCString orgTitle = match[2].str();
3675 orgTitle = orgTitle.stripWhiteSpace();
3676 QCString newLabel = markdownFileNameToId(fileName);
3677 docs = docs.left(match[1].position())+ // part before label
3678 newLabel+ // new label
3679 match[2].str()+ // part between orgLabel and \n
3680 "\\ilinebr @ianchor{" + orgTitle + "} "+orgLabel+"\n"+ // add original anchor plus \n of above
3681 docs.right(docs.length()-match.length()); // add remainder of docs
3682 }
3683 }
3684 break;
3686 break;
3688 break;
3689 }
3690 int lineNr=1;
3691
3692 p->commentScanner.enterFile(fileName,lineNr);
3694 bool needsEntry = false;
3695 int position=0;
3696 GuardedSectionStack guards;
3697 QCString processedDocs = markdown.process(docs,lineNr,true);
3698 while (p->commentScanner.parseCommentBlock(
3699 this,
3700 current.get(),
3701 processedDocs,
3702 fileName,
3703 lineNr,
3704 FALSE, // isBrief
3705 FALSE, // javadoc autobrief
3706 FALSE, // inBodyDocs
3707 prot, // protection
3708 position,
3709 needsEntry,
3710 true,
3711 &guards
3712 ))
3713 {
3714 if (needsEntry)
3715 {
3716 QCString docFile = current->docFile;
3717 root->moveToSubEntryAndRefresh(current);
3718 current->lang = SrcLangExt::Markdown;
3719 current->docFile = docFile;
3720 current->docLine = lineNr;
3721 }
3722 }
3723 if (needsEntry)
3724 {
3725 root->moveToSubEntryAndKeep(current);
3726 }
3727 p->commentScanner.leaveFile(fileName,lineNr);
3728}
3729
3731{
3732 Doxygen::parserManager->getOutlineParser("*.cpp")->parsePrototype(text);
3733}
3734
3735//------------------------------------------------------------------------
#define eol
The end of line string for this machine.
static AnchorGenerator & instance()
Returns the singleton instance.
Definition anchor.cpp:38
static std::string addPrefixIfNeeded(const std::string &anchor)
Definition anchor.cpp:46
std::string generate(const std::string &title)
generates an anchor for a section with title.
Definition anchor.cpp:59
Clang parser object for a single translation unit, which consists of a source file and the directly o...
Definition clangparser.h:25
@ Markdown
Definition debug.h:36
static void print(DebugMask mask, int prio, const char *fmt,...)
Definition debug.cpp:81
static ParserManager * parserManager
Definition doxygen.h:131
static FileNameLinkedMap * imageNameLinkedMap
Definition doxygen.h:106
A model of a file symbol.
Definition filedef.h:99
bool exists() const
Definition fileinfo.cpp:30
std::string fileName() const
Definition fileinfo.cpp:118
bool isReadable() const
Definition fileinfo.cpp:44
std::string absFilePath() const
Definition fileinfo.cpp:101
std::unique_ptr< Private > prv
Definition markdown.h:43
void setIndentLevel(int level)
Definition markdown.cpp:191
QCString extractPageTitle(QCString &docs, QCString &id, int &prepend, bool &isIdGenerated)
Markdown(const QCString &fileName, int lineNr, int indentLevel=0)
Definition markdown.cpp:182
QCString process(const QCString &input, int &startNewlines, bool fromParseInput=false)
void parseInput(const QCString &fileName, const char *fileBuf, const std::shared_ptr< Entry > &root, ClangTUParser *clangParser) override
Parses a single input file with the goal to build an Entry tree.
~MarkdownOutlineParser() override
void parsePrototype(const QCString &text) override
Callback function called by the comment block scanner.
std::unique_ptr< Private > p
Definition markdown.h:60
std::unique_ptr< OutlineParserInterface > getOutlineParser(const QCString &extension)
Gets the interface to the parser associated with a given extension.
Definition parserintf.h:209
This is an alternative implementation of QCString.
Definition qcstring.h:101
int find(char c, int index=0, bool cs=TRUE) const
Definition qcstring.cpp:43
QCString & prepend(const char *s)
Definition qcstring.h:407
size_t length() const
Returns the length of the string, not counting the 0-terminator.
Definition qcstring.h:153
QCString mid(size_t index, size_t len=static_cast< size_t >(-1)) const
Definition qcstring.h:226
QCString lower() const
Definition qcstring.h:234
bool endsWith(const char *s) const
Definition qcstring.h:504
char & at(size_t i)
Returns a reference to the character at index i.
Definition qcstring.h:567
bool isEmpty() const
Returns TRUE iff the string is empty.
Definition qcstring.h:150
QCString stripWhiteSpace() const
returns a copy of this string with leading and trailing whitespace removed
Definition qcstring.h:245
const std::string & str() const
Definition qcstring.h:526
QCString & setNum(short n)
Definition qcstring.h:444
QCString simplifyWhiteSpace() const
return a copy of this string with leading and trailing whitespace removed and multiple whitespace cha...
Definition qcstring.cpp:185
QCString right(size_t len) const
Definition qcstring.h:219
QCString & sprintf(const char *format,...)
Definition qcstring.cpp:29
int findRev(char c, int index=-1, bool cs=TRUE) const
Definition qcstring.cpp:91
const char * data() const
Returns a pointer to the contents of the string in the form of a 0-terminated C string.
Definition qcstring.h:159
std::string_view view() const
Definition qcstring.h:161
QCString left(size_t len) const
Definition qcstring.h:214
void clear()
Definition qcstring.h:169
static constexpr int Section
Definition section.h:33
static constexpr int MaxLevel
Definition section.h:39
static constexpr int Subsection
Definition section.h:34
static constexpr int Subsubsection
Definition section.h:35
static constexpr int MinLevel
Definition section.h:32
static constexpr int Paragraph
Definition section.h:36
static constexpr int Subsubparagraph
Definition section.h:38
static constexpr int Subparagraph
Definition section.h:37
Interface for the comment block scanner.
std::stack< GuardedSection > GuardedSectionStack
Definition commentscan.h:48
#define Config_getInt(name)
Definition config.h:34
#define Config_getBool(name)
Definition config.h:33
#define Config_getString(name)
Definition config.h:32
#define Config_getEnum(name)
Definition config.h:35
std::vector< std::string > StringVector
Definition containers.h:33
DirIterator end(const DirIterator &) noexcept
Definition dir.cpp:175
#define AUTO_TRACE_ADD(...)
Definition docnode.cpp:47
#define AUTO_TRACE(...)
Definition docnode.cpp:46
#define AUTO_TRACE_EXIT(...)
Definition docnode.cpp:48
#define AUTO_TRACE(...)
Definition markdown.cpp:61
static bool hasLineBreak(std::string_view data)
#define isIdChar(c)
Definition markdown.cpp:77
ExplicitPageResult
Definition markdown.cpp:67
@ explicitDirPage
docs start with a dir command
Definition markdown.cpp:70
@ explicitMainPage
docs start with a mainpage command
Definition markdown.cpp:69
@ explicitPage
docs start with a page command
Definition markdown.cpp:68
@ notExplicit
docs doesn't start with either page or mainpage
Definition markdown.cpp:71
static bool isBlockQuote(std::string_view data, size_t indent)
returns true if this line starts a block quote
static size_t isLinkRef(std::string_view data, QCString &refid, QCString &link, QCString &title)
returns end of the link ref if this is indeed a link reference.
static QCString escapeDoubleQuotes(const QCString &s)
Definition markdown.cpp:217
static bool isEndOfList(std::string_view data)
static size_t computeIndentExcludingListMarkers(std::string_view data)
static Alignment markersToAlignment(bool leftMarker, bool rightMarker)
helper function to convert presence of left and/or right alignment markers to a alignment value
Definition markdown.cpp:289
const char * g_doxy_nbsp
Definition markdown.cpp:200
static QCString escapeSpecialChars(const QCString &s)
Definition markdown.cpp:235
static bool isCodeBlock(std::string_view data, size_t offset, size_t &indent)
static bool isEmptyLine(std::string_view data)
#define AUTO_TRACE_EXIT(...)
Definition markdown.cpp:63
#define isLiTag(i)
static size_t findTableColumns(std::string_view data, size_t &start, size_t &end, size_t &columns)
Finds the location of the table's contains in the string data.
const size_t codeBlockIndent
Definition markdown.cpp:201
static ExplicitPageResult isExplicitPage(const QCString &docs)
const char * g_utf8_nbsp
Definition markdown.cpp:199
#define ignoreCloseEmphChar(c, cn)
Definition markdown.cpp:100
static const std::unordered_map< std::string, std::string > g_quotationHeaderMap
#define isOpenEmphChar(c)
Definition markdown.cpp:93
Alignment
Definition markdown.cpp:194
@ AlignLeft
Definition markdown.cpp:194
@ AlignNone
Definition markdown.cpp:194
@ AlignRight
Definition markdown.cpp:194
@ AlignCenter
Definition markdown.cpp:194
static bool isFencedCodeBlock(std::string_view data, size_t refIndent, QCString &lang, size_t &start, size_t &end, size_t &offset)
static size_t isListMarker(std::string_view data)
static bool isHRuler(std::string_view data)
static QCString getFilteredImageAttributes(std::string_view fmt, const QCString &attrs)
parse the image attributes and return attributes for given format
Definition markdown.cpp:310
bool skipOverFileAndLineCommands(std::string_view data, size_t indent, size_t &offset, std::string &location)
#define extraChar(c)
Definition markdown.cpp:84
static bool isTableBlock(std::string_view data)
Returns TRUE iff data points to the start of a table block.
size_t isNewline(std::string_view data)
Definition markdown.cpp:207
QCString markdownFileNameToId(const QCString &fileName)
processes string s and converts markdown into doxygen/html commands.
#define warn(file, line, fmt,...)
Definition message.h:59
bool isAbsolutePath(const QCString &fileName)
Definition portable.cpp:514
const char * strnstr(const char *haystack, const char *needle, size_t haystack_len)
Definition portable.cpp:617
QCString trunc(const QCString &s, size_t numChars=15)
Definition trace.h:56
Definition trace.h:153
bool search(std::string_view str, Match &match, const Ex &re, size_t pos)
Search in a given string str starting at position pos for a match against regular expression re.
Definition regex.cpp:748
Portable versions of functions that are platform dependent.
static void decrLevel(yyscan_t yyscanner)
Definition pre.l:2176
QCString substitute(const QCString &s, const QCString &src, const QCString &dst)
substitute all occurrences of src in s by dst
Definition qcstring.cpp:477
int qstrncmp(const char *str1, const char *str2, size_t len)
Definition qcstring.h:75
bool qisspace(char c)
Definition qcstring.h:81
const char * qPrint(const char *s)
Definition qcstring.h:661
#define TRUE
Definition qcstring.h:37
#define FALSE
Definition qcstring.h:34
Some helper functions for std::string.
std::string_view stripWhiteSpace(std::string_view s)
Given a string view s, returns a new, narrower view on that string, skipping over any leading or trai...
Definition stringutil.h:72
int processEmphasis1(std::string_view data, char c)
process single emphasis
Definition markdown.cpp:770
int processQuoted(std::string_view data, size_t offset)
Process quoted section "...", can contain one embedded newline.
Definition markdown.cpp:940
void writeMarkdownImage(std::string_view fmt, bool inline_img, bool explicitTitle, const QCString &title, const QCString &content, const QCString &link, const QCString &attributes, const FileDef *fd)
size_t writeTableBlock(std::string_view data)
size_t writeBlockQuote(std::string_view data)
size_t isSpecialCommand(std::string_view data, size_t offset)
Definition markdown.cpp:426
std::function< int(std::string_view, size_t)> Action_t
Definition markdown.cpp:172
int processEmphasis3(std::string_view data, char c)
Parsing triple emphasis.
Definition markdown.cpp:836
int processCodeSpan(std::string_view data, size_t offset)
` parsing a code span (assuming codespan != 0)
int processSpecialCommand(std::string_view data, size_t offset)
QCString extractTitleId(QCString &title, int level, bool *pIsIdGenerated=nullptr)
void writeFencedCodeBlock(std::string_view data, std::string_view lang, size_t blockStart, size_t blockEnd)
int isHeaderline(std::string_view data, bool allowAdjustLevel)
returns whether the line is a setext-style hdr underline
size_t findEmphasisChar(std::string_view, char c, size_t c_size)
looks for the next emph char, skipping other constructs, and stopping when either it is found,...
Definition markdown.cpp:657
std::unordered_map< std::string, LinkRef > linkRefs
Definition markdown.cpp:174
void addStrEscapeUtf8Nbsp(std::string_view data)
QCString isBlockCommand(std::string_view data, size_t offset)
Definition markdown.cpp:357
size_t writeCodeBlock(std::string_view, size_t refIndent)
int processHtmlTag(std::string_view data, size_t offset)
QCString processQuotations(std::string_view data, size_t refIndent)
QCString processBlocks(std::string_view data, size_t indent)
int processEmphasis(std::string_view data, size_t offset)
int processLink(std::string_view data, size_t offset)
int processHtmlTagWrite(std::string_view data, size_t offset, bool doWrite)
Process a HTML tag.
Definition markdown.cpp:964
int isAtxHeader(std::string_view data, QCString &header, QCString &id, bool allowAdjustLevel, bool *pIsIdGenerated=nullptr)
size_t findEndOfLine(std::string_view data, size_t offset)
int processEmphasis2(std::string_view data, char c)
process double emphasis
Definition markdown.cpp:804
void processInline(std::string_view data)
int processNmdash(std::string_view data, size_t offset)
Process ndash and mdashes.
Definition markdown.cpp:898
void writeOneLineHeaderOrRuler(std::string_view data)
std::array< Action_t, 256 > actions
Definition markdown.cpp:179
Protection
Protection level of members.
Definition types.h:26
@ Public
Definition types.h:26
SrcLangExt
Language as given by extension.
Definition types.h:42
@ Markdown
Definition types.h:57
SrcLangExt getLanguageFromFileName(const QCString &fileName, SrcLangExt defLang)
Definition util.cpp:5549
QCString escapeCharsInString(const QCString &name, bool allowDots, bool allowUnderscore)
Definition util.cpp:3684
bool found
Definition util.cpp:984
QCString stripExtensionGeneral(const QCString &fName, const QCString &ext)
Definition util.cpp:5255
bool isURL(const QCString &url)
Checks whether the given url starts with a supported protocol.
Definition util.cpp:6245
static QCString stripFromPath(const QCString &p, const StringVector &l)
Definition util.cpp:309
QCString detab(const QCString &s, size_t &refIndent)
Definition util.cpp:7047
StringVector split(const std::string &s, const std::string &delimiter)
split input string s by string delimiter delimiter.
Definition util.cpp:6945
QCString externalLinkTarget(const bool parent)
Definition util.cpp:6021
QCString getFileNameExtension(const QCString &fn)
Definition util.cpp:5591
FileDef * findFileDef(const FileNameLinkedMap *fnMap, const QCString &n, bool &ambig)
Definition util.cpp:3262
A bunch of utility functions.