Doxygen
Loading...
Searching...
No Matches
dotrunner.cpp
Go to the documentation of this file.
1/******************************************************************************
2*
3* Copyright (C) 1997-2019 by Dimitri van Heesch.
4*
5* Permission to use, copy, modify, and distribute this software and its
6* documentation under the terms of the GNU General Public License is hereby
7* granted. No representations are made about the suitability of this software
8* for any purpose. It is provided "as is" without express or implied warranty.
9* See the GNU General Public License for more details.
10*
11* Documents produced by Doxygen are derivative works derived from the
12* input used in their production; they are not affected by this license.
13*
14*/
15
16#include <cassert>
17#include <cmath>
18#include <map>
19#include <set>
20#include <string>
21#include <algorithm>
22#include <numeric>
23#include <random>
24#include "threadpool.h"
25
26#ifdef _MSC_VER
27#pragma warning( push )
28#pragma warning( disable : 4242 )
29#pragma warning( disable : 4244 )
30#pragma warning( disable : 4996 )
31#pragma warning( disable : 4456 )
32#pragma warning( disable : 4805 )
33#endif
34#if defined(__clang__)
35#pragma clang diagnostic push
36#pragma clang diagnostic ignored "-Wdeprecated-declarations"
37#pragma clang diagnostic ignored "-Wshadow"
38#endif
39#if defined(__GNUC__)
40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Wshadow"
42#endif
43#include <gunzip.hh>
44#if defined(__GNUC__)
45#pragma GCC diagnostic pop
46#endif
47#if defined(__clang__)
48#pragma clang diagnostic pop
49#endif
50#ifdef _MSC_VER
51#pragma warning( pop )
52#endif
53
54#include "dotrunner.h"
55#include "util.h"
56#include "portable.h"
57#include "dot.h"
58#include "message.h"
59#include "config.h"
60#include "dir.h"
61#include "doxygen.h"
62
63// the graphicx LaTeX has a limitation of maximum size of 16384
64// To be on the save side we take it a little bit smaller i.e. 150 inch * 72 dpi
65// It is anyway hard to view these size of images
66#define MAX_LATEX_GRAPH_INCH 150
67#define MAX_LATEX_GRAPH_SIZE (MAX_LATEX_GRAPH_INCH * 72)
68
69//#define DBG(x) printf x
70#define DBG(x) do {} while(0)
71
72//-----------------------------------------------------------------------------------------
73
74// since dot silently reproduces the input file when it does not
75// support the PNG format, we need to check the result.
76static void checkPngResult(const QCString &imgName)
77{
78 FILE *f = Portable::fopen(imgName,"rb");
79 if (!f)
80 {
81 err("Could not read image '{}' generated by dot!\n",imgName);
82 return;
83 }
84
85 char data[4];
86 if (fread(data, 1, 4, f) != 4)
87 {
88 err("Could not read image '{}' generated by dot!\n",imgName);
89 fclose(f);
90 return;
91 }
92
93 if (!(data[1] == 'P' && data[2] == 'N' && data[3] == 'G'))
94 {
95 err("Image '{}' produced by dot is not a valid PNG!\n"
96 "You should either select a different format "
97 "(DOT_IMAGE_FORMAT in the config file) or install a more "
98 "recent version of graphviz (1.7+)\n", imgName);
99 }
100
101 fclose(f);
102}
103
104static bool resetPDFSize(const int width,const int height, const QCString &base)
105{
106 QCString tmpName = base+".tmp";
107 QCString patchFile = base+".dot";
108 Dir thisDir;
109 if (!thisDir.rename(patchFile.str(),tmpName.str()))
110 {
111 err("Failed to rename file {} to {}!\n",patchFile,tmpName);
112 return FALSE;
113 }
114 std::ifstream fi = Portable::openInputStream(tmpName);
115 std::ofstream t = Portable::openOutputStream(patchFile);
116 if (!fi.is_open())
117 {
118 err("problem opening file {} for patching!\n",tmpName);
119 thisDir.rename(tmpName.str(),patchFile.str());
120 return FALSE;
121 }
122 if (!t.is_open())
123 {
124 err("problem opening file {} for patching!\n",patchFile);
125 thisDir.rename(tmpName.str(),patchFile.str());
126 return FALSE;
127 }
128 std::string line;
129 while (getline(fi,line)) // foreach line
130 {
131 if (line.find("LATEX_PDF_SIZE") != std::string::npos)
132 {
133 double scale = (width > height ? width : height)/double(MAX_LATEX_GRAPH_INCH);
134 t << " size=\""<<width/scale << "," <<height/scale << "\";\n";
135 }
136 else
137 t << line << "\n";
138 }
139 fi.close();
140 t.close();
141 // remove temporary file
142 thisDir.remove(tmpName.str());
143 return TRUE;
144}
145
146bool DotRunner::readBoundingBox(const QCString &fileName,int *width,int *height,bool isEps)
147{
148 std::ifstream f = Portable::openInputStream(fileName);
149 if (!f.is_open())
150 {
151 err("Failed to open file {} for extracting bounding box\n",fileName);
152 return false;
153 }
154
155 // read file contents into string 'contents'
156 std::stringstream buffer;
157 buffer << f.rdbuf();
158 std::string contents = buffer.str();
159
160 // start of bounding box marker we are looking for
161 const std::string boundingBox = isEps ? "%%PageBoundingBox:" : "/MediaBox [";
162
163 // helper routine to extract the bounding boxes width and height
164 auto extractBoundingBox = [&fileName,&boundingBox,&width,&height](const char *s) -> bool
165 {
166 int x=0, y=0;
167 double w=0, h=0;
168 if (sscanf(s+boundingBox.length(),"%d %d %lf %lf",&x,&y,&w,&h)==4)
169 {
170 *width = static_cast<int>(std::ceil(w));
171 *height = static_cast<int>(std::ceil(h));
172 return true;
173 }
174 err("Failed to extract bounding box from generated diagram file {}\n",fileName);
175 return false;
176 };
177
178 // compressed segment start and end markers
179 const std::string streamStart = "stream\n";
180 const std::string streamEnd = "\nendstream";
181
182 auto detectDeflateStreamStart = [&streamStart](const char *s)
183 {
184 size_t len = streamStart.length();
185 bool streamOK = strncmp(s,streamStart.c_str(),len)==0;
186 if (streamOK) // ASCII marker matches, check stream header bytes as well
187 {
188 unsigned short header1 = static_cast<unsigned char>(s[len])<<8; // CMF byte
189 if (header1) // not end of string
190 {
191 unsigned short header = (static_cast<unsigned char>(s[len+1])) | header1; // FLG byte
192 // check for correct header (see https://www.rfc-editor.org/rfc/rfc1950)
193 return ((header&0x8F20)==0x0800) && (header%31)==0;
194 }
195 }
196 return false;
197 };
198
199 const size_t l = contents.length();
200 size_t i=0;
201 while (i<l)
202 {
203 if (!isEps && contents[i]=='s' && detectDeflateStreamStart(&contents[i]))
204 { // compressed stream start
205 int col=17;
206 i+=streamStart.length();
207 const size_t start=i;
208 DBG(("---- start stream at offset %08x\n",(int)i));
209 while (i<l)
210 {
211 if (contents[i]=='\n' && strncmp(&contents[i],streamEnd.c_str(),streamEnd.length())==0)
212 { // compressed block found in range [start..i]
213 DBG(("\n---- end stream at offset %08x\n",(int)i));
214 // decompress it into decompressBuf
215 std::vector<char> decompressBuf;
216 const char *source = &contents[start];
217 const size_t sourceLen = i-start;
218 size_t sourcePos = 0;
219 decompressBuf.reserve(sourceLen*2);
220 auto getter = [source,&sourcePos,sourceLen]() -> int {
221 return sourcePos<sourceLen ? static_cast<unsigned char>(source[sourcePos++]) : EOF;
222 };
223 auto putter = [&decompressBuf](const char c) -> int {
224 decompressBuf.push_back(c); return c;
225 };
226 Deflate(getter,putter);
227 // convert decompression buffer to string
228 std::string s(decompressBuf.begin(), decompressBuf.end());
229 DBG(("decompressed_data=[[[\n%s\n]]]\n",s.c_str()));
230 // search for bounding box marker
231 const size_t idx = s.find(boundingBox);
232 if (idx!=std::string::npos) // found bounding box in uncompressed data
233 {
234 return extractBoundingBox(s.c_str()+idx);
235 }
236 // continue searching after end stream marker
237 i+=streamEnd.length();
238 break;
239 }
240 else // compressed stream character
241 {
242 if (col>16) { col=0; DBG(("\n%08x: ",static_cast<int>(i))); }
243 DBG(("%02x ",static_cast<unsigned char>(contents[i])));
244 col++;
245 i++;
246 }
247 }
248 }
249 else if (((isEps && contents[i]=='%') || (!isEps && contents[i]=='/')) &&
250 strncmp(&contents[i],boundingBox.c_str(),boundingBox.length())==0)
251 { // uncompressed bounding box
252 return extractBoundingBox(&contents[i]);
253 }
254 else // uncompressed stream character
255 {
256 i++;
257 }
258 }
259 err("Failed to find bounding box in generated diagram file {}\n",fileName);
260 // nothing found
261 return false;
262}
263
264//---------------------------------------------------------------------------------
265
267{
268 int index = output.findRev('.');
269 if (index < 0) return output;
270 return output.left(index);
271}
272
274 : m_dotExe(Doxygen::verifiedDotPath)
275{
276}
277
278bool DotRunner::run(const DotJobs &dotJobs)
279{
280 if (dotJobs.empty()) return TRUE;
281
282 // Group jobs by format, then by directory so we can cd once per group
283 std::map<std::string, std::map<std::string, std::vector<const DotJob*>>> byFormatAndDir;
284 for (const auto &job : dotJobs)
285 {
286 byFormatAndDir[job.format.str()][job.absPath.str()].push_back(&job);
287 }
288
289 std::mt19937 rng(std::random_device{}());
290 bool ok = true;
291 size_t prev=0;
292 for (const auto &[fmtStr, byDir] : byFormatAndDir)
293 {
294 QCString format = QCString(fmtStr);
295
296 for (const auto &[dirStr, jobs] : byDir)
297 {
298 std::string oldDir = Dir::currentDirPath();
299 Dir::setCurrent(dirStr);
300
301 // settings controlling how to distribute the graphs over threads and batches
302 const size_t numThreads = static_cast<size_t>(Config_getInt(DOT_NUM_THREADS));
303 const size_t batchSize = static_cast<size_t>(Config_getInt(DOT_BATCH_SIZE));
304 const size_t exeLen = m_dotExe.length() + 1; // "exe " prefix
305 const size_t maxArgLen = 32000-exeLen; // Windows CreateProcess limit is 32767; keep safe margin
306
307 // create a pseudo random ordering in which to process the dot files
308 std::vector<size_t> indices(jobs.size());
309 std::iota(indices.begin(), indices.end(), 0);
310 std::shuffle(indices.begin(), indices.end(), rng);
311
312 // helper to keep track of dot command to run later
313 struct CommandArgument
314 {
315 CommandArgument(const QCString &args) : arguments(args) {}
316 QCString arguments;
317 size_t numDotFiles = 0;
318 const DotJob *firstJob = nullptr;
319 };
320
321 std::vector<CommandArgument> partialCommands;
322 std::vector<CommandArgument> finalCommands;
323
324 bool hasImageMap = std::any_of(jobs.begin(),jobs.end(),[](const auto &j) { return j->generateImageMap; });
325
326 // each dot command has a command arguments of the form: -Tformat -O basename1.dot basename2.dot ...
327 QCString baseArgs = QCString("-T") + format;
328 if (hasImageMap) // if any image needs a map we generate one for all images
329 {
330 baseArgs += " -Tcmapx";
331 }
332 baseArgs += " -O";
333
334 // prepare partial commands for each thread (command is later skipped if numDotFiles==0).
335 for (size_t i=0; i<numThreads; i++)
336 {
337 partialCommands.emplace_back(baseArgs);
338 }
339
340 // split the jobs into batches per thread iterating in pseudo random order to fill each batch with a random selection of graphs
341 size_t index=0;
342 for (size_t i : indices)
343 {
344 const auto &job = jobs[i];
345 QCString fileArg = QCString(" ") + job->relDotName;
346 auto &cmd = partialCommands[index];
347 if (cmd.numDotFiles<batchSize && cmd.arguments.length()+fileArg.length()<maxArgLen) // still room in this batch
348 {
349 cmd.arguments+=fileArg;
350 cmd.numDotFiles++;
351 }
352 else // this batch is full, move to finished commands and start a new one
353 {
354 finalCommands.push_back(cmd);
355 cmd.arguments=baseArgs+fileArg;
356 cmd.numDotFiles=1;
357 }
358 if (cmd.firstJob==nullptr) cmd.firstJob=job;
359 index = (index+1)%numThreads;
360 }
361
362 // append partial commands to the final commands
363 finalCommands.insert(finalCommands.end(),partialCommands.begin(),partialCommands.end());
364
365 // now run the finalCommands.
366 if (Config_getInt(DOT_NUM_THREADS)<=1) // no threads to work with
367 {
368 for (const auto &cmd : finalCommands)
369 {
370 if (cmd.numDotFiles>0) // check if there are graphs to generate first
371 {
372 if (cmd.numDotFiles>1) // batch mode
373 {
374 msg("Running dot for graphs {}-{}/{}\n",prev+1,prev+cmd.numDotFiles,dotJobs.size());
375 }
376 else // single graph mode
377 {
378 msg("Running dot for graph {}/{}\n",prev+1,dotJobs.size());
379 }
380 prev+=cmd.numDotFiles;
381 int exitCode;
382 if ((exitCode = Portable::system(m_dotExe, cmd.arguments, FALSE)) != 0)
383 {
384 err_full(cmd.firstJob->srcFile, 1,
385 "Problems running dot: exit code={}, command='{}', dir='{}', arguments='{}'",
386 exitCode, m_dotExe, dirStr, cmd.arguments);
387 ok = false;
388 }
389 }
390 }
391 }
392 else // use multiple threads to run instances of dot in parallel
393 {
394 ThreadPool workers(numThreads);
395 std::vector< std::future<size_t> > results;
396 for (auto & cmd: finalCommands)
397 {
398 if (cmd.numDotFiles>0)
399 {
400 auto process = [this,cmd,dirStr]() -> size_t
401 {
402 int exitCode;
403 if ((exitCode = Portable::system(m_dotExe, cmd.arguments, FALSE)) != 0)
404 {
405 err_full(cmd.firstJob->srcFile, 1,
406 "Problems running dot: exit code={}, command='{}', dir='{}', arguments='{}'",
407 exitCode, m_dotExe, dirStr, cmd.arguments);
408 }
409 return cmd.numDotFiles;
410 };
411 results.emplace_back(workers.queue(process));
412 }
413 }
414 for (auto &f : results)
415 {
416 size_t numDotFiles = f.get();
417 if (numDotFiles>1) // batch mode
418 {
419 msg("Finished running dot for graphs {}-{}/{}\n",prev+1,prev+numDotFiles,dotJobs.size());
420 }
421 else // single graph mode
422 {
423 msg("Finished running dot for graph {}/{}\n",prev+1,dotJobs.size());
424 }
425 prev+=numDotFiles;
426 }
427 }
428
429 // Post-process each output file. dot -O appends the format suffix to the
430 // full input filename, so the output is absPath + relDotName + "." + format.
431 // Rename to remove the .dot infix, producing absPath + baseName + "." + format.
432 for (const auto *job : jobs)
433 {
434 QCString base = job->absPath + getBaseNameOfOutput(job->relDotName);
435 QCString dotOutput = job->absPath + job->relDotName + "." + format;
436 QCString output = base + "." + format;
437 Dir d;
438 if (!d.rename(dotOutput.str(), output.str()))
439 {
440 err("Failed to rename {} to {}!\n", dotOutput, output);
441 ok = false;
442 continue;
443 }
444 if (job->generateImageMap)
445 {
446 QCString dotMapOutput = job->absPath + job->relDotName + ".cmapx";
447 QCString mapOutput = base + ".map";
448 if (!d.rename(dotMapOutput.str(), mapOutput.str()))
449 {
450 err("Failed to rename {} to {}!\n", dotMapOutput, mapOutput);
451 ok = false;
452 continue;
453 }
454 }
455
456 if (format.startsWith("pdf"))
457 {
458 int width=0, height=0;
459 if (!readBoundingBox(output, &width, &height, FALSE))
460 {
461 ok = false;
462 continue;
463 }
464 if ((width > MAX_LATEX_GRAPH_SIZE) || (height > MAX_LATEX_GRAPH_SIZE))
465 {
466 if (!resetPDFSize(width, height, base))
467 {
468 ok = false;
469 continue;
470 }
471 // Re-run dot for just this one file
472 QCString rerunArgs = QCString("-T") + format + " -O \"" + job->relDotName + "\"";
473 int exitCode;
474 if ((exitCode = Portable::system(m_dotExe, rerunArgs, FALSE)) != 0)
475 {
476 err_full(job->srcFile, 1,
477 "Problems running dot: exit code={}, command='{}', dir='{}', arguments='{}'",
478 exitCode, m_dotExe, dirStr, rerunArgs);
479 ok = false;
480 }
481 else
482 {
483 Dir d2;
484 if (!d2.rename(dotOutput.str(), output.str()))
485 {
486 err("Failed to rename {} to {}!\n", dotOutput, output);
487 ok = false;
488 }
489 }
490 }
491 }
492 else if (format.startsWith("png"))
493 {
494 checkPngResult(output);
495 }
496 }
497 Dir::setCurrent(oldDir);
498 }
499 }
500
501 // Write .md5 files and clean up .dot files (once per unique dotFile)
502 std::set<std::string> processed;
503 for (const auto &job : dotJobs)
504 {
505 if (!processed.insert((job.absPath + job.relDotName).str()).second) continue;
506
507 if (!job.md5Hash.isEmpty())
508 {
509 QCString md5Name = job.absPath + getBaseNameOfOutput(job.relDotName) + ".md5";
510 FILE *f = Portable::fopen(md5Name, "w");
511 if (f)
512 {
513 fwrite(job.md5Hash.data(), 1, 32, f);
514 fclose(f);
515 }
516 }
517
518 if (Config_getBool(DOT_CLEANUP))
519 {
520 Portable::unlink(job.absPath + job.relDotName);
521 }
522 }
523
524 return ok;
525}
Class representing a directory in the file system.
Definition dir.h:75
static std::string currentDirPath()
Definition dir.cpp:342
bool remove(const std::string &path, bool acceptsAbsPath=true) const
Definition dir.cpp:314
bool rename(const std::string &orgName, const std::string &newName, bool acceptsAbsPath=true) const
Definition dir.cpp:321
static bool setCurrent(const std::string &path)
Definition dir.cpp:350
bool run(const DotJobs &jobs)
Runs dot for all given jobs.
static bool readBoundingBox(const QCString &fileName, int *width, int *height, bool isEps)
QCString m_dotExe
Definition dotrunner.h:42
This class serves as a namespace for global variables used by doxygen.
Definition doxygen.h:93
This is an alternative implementation of QCString.
Definition qcstring.h:101
size_t length() const
Returns the length of the string, not counting the 0-terminator.
Definition qcstring.h:166
const std::string & str() const
Definition qcstring.h:552
int findRev(char c, int index=-1, bool cs=TRUE) const
Definition qcstring.cpp:96
QCString left(size_t len) const
Definition qcstring.h:229
Class managing a pool of worker threads.
Definition threadpool.h:48
auto queue(F &&f, Args &&... args) -> std::future< decltype(f(args...))>
Queue the callable function f for the threads to execute.
Definition threadpool.h:77
#define Config_getInt(name)
Definition config.h:34
#define Config_getBool(name)
Definition config.h:33
std::vector< DotJob > DotJobs
Definition dotjob.h:36
#define MAX_LATEX_GRAPH_INCH
Definition dotrunner.cpp:66
#define DBG(x)
Definition dotrunner.cpp:70
static QCString getBaseNameOfOutput(const QCString &output)
static void checkPngResult(const QCString &imgName)
Definition dotrunner.cpp:76
#define MAX_LATEX_GRAPH_SIZE
Definition dotrunner.cpp:67
static bool resetPDFSize(const int width, const int height, const QCString &base)
static bool extractBoundingBox(const QCString &formBase, int *x1, int *y1, int *x2, int *y2, double *x1hi, double *y1hi, double *x2hi, double *y2hi)
Definition formula.cpp:298
#define msg(fmt,...)
Definition message.h:94
#define err(fmt,...)
Definition message.h:127
#define err_full(file, line, fmt,...)
Definition message.h:132
std::ifstream openInputStream(const QCString &name, bool binary=false, bool openAtEnd=false)
Definition portable.cpp:659
std::ofstream openOutputStream(const QCString &name, bool append=false)
Definition portable.cpp:648
void unlink(const QCString &fileName)
Definition portable.cpp:544
FILE * fopen(const QCString &fileName, const QCString &mode)
Definition portable.cpp:349
int system(const QCString &command, const QCString &args, bool commandHasConsole=true)
Definition portable.cpp:105
Portable versions of functions that are platform dependent.
#define TRUE
Definition qcstring.h:37
#define FALSE
Definition qcstring.h:34
A bunch of utility functions.