Doxygen
Loading...
Searching...
No Matches
searchindex.cpp
Go to the documentation of this file.
1/******************************************************************************
2 *
3 * Copyright (C) 1997-2020 by Dimitri van Heesch.
4 *
5 * Permission to use, copy, modify, and distribute this software and its
6 * documentation under the terms of the GNU General Public License is hereby
7 * granted. No representations are made about the suitability of this software
8 * for any purpose. It is provided "as is" without express or implied warranty.
9 * See the GNU General Public License for more details.
10 *
11 * Documents produced by Doxygen are derivative works derived from the
12 * input used in their production; they are not affected by this license.
13 *
14 */
15
16
17#include <ctype.h>
18#include <assert.h>
19#include <mutex>
20#include <map>
21#include <unordered_map>
22
23#include "searchindex.h"
24
25#include "config.h"
26#include "util.h"
27#include "doxygen.h"
28#include "language.h"
29#include "pagedef.h"
30#include "message.h"
31#include "groupdef.h"
32#include "filedef.h"
33#include "portable.h"
34
35
36// file format: (all multi-byte values are stored in big endian format)
37// 4 byte header
38// 256*256*4 byte index (4 bytes)
39// for each index entry: a zero terminated list of words
40// for each word: a \0 terminated string + 4 byte offset to the stats info
41// padding bytes to align at 4 byte boundary
42// for each word: the number of urls (4 bytes)
43// + for each url containing the word 8 bytes statistics
44// (4 bytes index to url string + 4 bytes frequency counter)
45// for each url: a \0 terminated string
46
47const size_t numIndexEntries = 256*256;
48
49static std::mutex g_searchIndexMutex;
50
51//--------------------------------------------------------------------
52
53void SearchIndex::IndexWord::addUrlIndex(int idx,bool hiPriority)
54{
55 //printf("IndexWord::addUrlIndex(%d,%d)\n",idx,hiPriority);
56 auto it = m_urls.find(idx);
57 if (it==m_urls.end())
58 {
59 //printf("URLInfo::URLInfo(%d)\n",idx);
60 it = m_urls.emplace(idx,URLInfo(idx,0)).first;
61 }
62 it->second.freq+=2;
63 if (hiPriority) it->second.freq|=1; // mark as high priority document
64}
65
66//--------------------------------------------------------------------
67
72
73void SearchIndex::setCurrentDoc(const Definition *ctx,const QCString &anchor,bool isSourceFile)
74{
75 if (ctx==nullptr) return;
76 std::lock_guard<std::mutex> lock(g_searchIndexMutex);
77 assert(!isSourceFile || ctx->definitionType()==Definition::TypeFile);
78 //printf("SearchIndex::setCurrentDoc(%s,%s,%s)\n",name,baseName,anchor);
79 QCString url=isSourceFile ? (toFileDef(ctx))->getSourceFileBase() : ctx->getOutputFileBase();
80 url+=Config_getString(HTML_FILE_EXTENSION);
81 QCString baseUrl = url;
82 if (!anchor.isEmpty()) url+=QCString("#")+anchor;
83 if (!isSourceFile) baseUrl=url;
84 QCString name=ctx->qualifiedName();
86 {
87 const MemberDef *md = toMemberDef(ctx);
88 name.prepend((md->getLanguage()==SrcLangExt::Fortran ?
89 theTranslator->trSubprogram(TRUE,TRUE) :
90 theTranslator->trMember(TRUE,TRUE))+" ");
91 }
92 else // compound type
93 {
94 SrcLangExt lang = ctx->getLanguage();
96 if (sep!="::")
97 {
98 name = substitute(name,"::",sep);
99 }
100 switch (ctx->definitionType())
101 {
103 {
104 const PageDef *pd = toPageDef(ctx);
105 if (pd->hasTitle())
106 {
107 name = theTranslator->trPage(TRUE,TRUE)+" "+pd->title();
108 }
109 else
110 {
111 name = theTranslator->trPage(TRUE,TRUE)+" "+pd->name();
112 }
113 }
114 break;
116 {
117 const ClassDef *cd = toClassDef(ctx);
118 name.prepend(cd->compoundTypeString()+" ");
119 }
120 break;
122 {
123 if (lang==SrcLangExt::Java || lang==SrcLangExt::CSharp)
124 {
125 name = theTranslator->trPackage(name);
126 }
127 else if (lang==SrcLangExt::Fortran)
128 {
129 name.prepend(theTranslator->trModule(TRUE,TRUE)+" ");
130 }
131 else
132 {
133 name.prepend(theTranslator->trNamespace(TRUE,TRUE)+" ");
134 }
135 }
136 break;
138 {
139 const GroupDef *gd = toGroupDef(ctx);
140 if (!gd->groupTitle().isEmpty())
141 {
142 name = theTranslator->trGroup(TRUE,TRUE)+" "+gd->groupTitle();
143 }
144 else
145 {
146 name.prepend(theTranslator->trGroup(TRUE,TRUE)+" ");
147 }
148 }
149 break;
151 {
152 name.prepend(theTranslator->trModule(TRUE,TRUE)+" ");
153 }
154 break;
155 default:
156 break;
157 }
158 }
159
160 auto it = m_url2IdMap.find(baseUrl.str());
161 if (it == m_url2IdMap.end()) // new entry
162 {
164 m_url2IdMap.emplace(baseUrl.str(),m_urlIndex);
165 m_urls.emplace(m_urlIndex,URL(name,url));
166 }
167 else // existing entry
168 {
169 m_urlIndex=it->second;
170 m_urls.emplace(it->second,URL(name,url));
171 }
172}
173
174static int charsToIndex(const QCString &word)
175{
176 if (word.length()<2) return -1;
177
178 // Fast string hashing algorithm
179 //register uint16_t h=0;
180 //const char *k = word;
181 //uint16_t mask=0xfc00;
182 //while ( *k )
183 //{
184 // h = (h&mask)^(h<<6)^(*k++);
185 //}
186 //return h;
187
188 // Simple hashing that allows for substring searching
189 uint32_t c1=static_cast<uint8_t>(word[0]);
190 uint32_t c2=static_cast<uint8_t>(word[1]);
191 return c1*256+c2;
192}
193
194void SearchIndex::addWordRec(const QCString &word,bool hiPriority,bool recurse)
195{
196 if (word.isEmpty()) return;
197 QCString wStr = QCString(word).lower();
198 //printf("SearchIndex::addWord(%s,%d) wStr=%s\n",word,hiPriority,qPrint(wStr));
199 int idx=charsToIndex(wStr);
200 if (idx<0 || idx>=static_cast<int>(m_index.size())) return;
201 auto it = m_words.find(wStr.str());
202 if (it==m_words.end())
203 {
204 //fprintf(stderr,"addWord(%s) at index %d\n",word,idx);
205 m_index[idx].emplace_back(wStr);
206 it = m_words.emplace( wStr.str(), static_cast<int>(m_index[idx].size())-1 ).first;
207 }
208 m_index[idx][it->second].addUrlIndex(m_urlIndex,hiPriority);
209 bool found=FALSE;
210 if (!recurse) // the first time we check if we can strip the prefix
211 {
212 int i=getPrefixIndex(word);
213 if (i>0)
214 {
215 addWordRec(word.data()+i,hiPriority,TRUE);
216 found=TRUE;
217 }
218 }
219 if (!found) // no prefix stripped
220 {
221 int i=0;
222 while (word[i]!=0 &&
223 !((word[i]=='_' || word[i]==':' || (word[i]>='a' && word[i]<='z')) && // [_a-z:]
224 (word[i+1]>='A' && word[i+1]<='Z'))) // [A-Z]
225 {
226 i++;
227 }
228 if (word[i]!=0 && i>=1)
229 {
230 addWordRec(word.data()+i+1,hiPriority,TRUE);
231 }
232 }
233}
234
235void SearchIndex::addWord(const QCString &word,bool hiPriority)
236{
237 std::lock_guard<std::mutex> lock(g_searchIndexMutex);
238 addWordRec(word,hiPriority,FALSE);
239}
240
241static void writeInt(std::ostream &f,size_t index)
242{
243 f.put(static_cast<int>((index>>24)&0xff));
244 f.put(static_cast<int>((index>>16)&0xff));
245 f.put(static_cast<int>((index>>8)&0xff));
246 f.put(static_cast<int>(index&0xff));
247}
248
249static void writeString(std::ostream &f,const QCString &s)
250{
251 size_t l = s.length();
252 for (size_t i=0;i<l;i++) f.put(s[i]);
253 f.put(0);
254}
255
256void SearchIndex::write(const QCString &fileName)
257{
258 size_t size=4; // for the header
259 size+=4*numIndexEntries; // for the index
260 size_t wordsOffset = size;
261 // first pass: compute the size of the wordlist
262 for (size_t i=0;i<numIndexEntries;i++)
263 {
264 const auto &wlist = m_index[i];
265 if (!wlist.empty())
266 {
267 for (const auto &iw : wlist)
268 {
269 size_t ws = iw.word().length()+1;
270 size+=ws+4; // word + url info list offset
271 }
272 size+=1; // zero list terminator
273 }
274 }
275
276 // second pass: compute the offsets in the index
277 size_t indexOffsets[numIndexEntries];
278 size_t offset=wordsOffset;
279 for (size_t i=0;i<numIndexEntries;i++)
280 {
281 const auto &wlist = m_index[i];
282 if (!wlist.empty())
283 {
284 indexOffsets[i]=offset;
285 for (const auto &iw : wlist)
286 {
287 offset+= iw.word().length()+1;
288 offset+=4; // word + offset to url info array
289 }
290 offset+=1; // zero list terminator
291 }
292 else
293 {
294 indexOffsets[i]=0;
295 }
296 }
297 size_t padding = size;
298 size = (size+3)&~3; // round up to 4 byte boundary
299 padding = size - padding;
300
301 std::vector<size_t> wordStatOffsets(m_words.size());
302
303 int count=0;
304
305 // third pass: compute offset to stats info for each word
306 for (size_t i=0;i<numIndexEntries;i++)
307 {
308 const auto &wlist = m_index[i];
309 if (!wlist.empty())
310 {
311 for (const auto &iw : wlist)
312 {
313 //printf("wordStatOffsets[%d]=%d\n",count,size);
314 wordStatOffsets[count++] = size;
315 size+=4 + iw.urls().size() * 8; // count + (url_index,freq) per url
316 }
317 }
318 }
319 std::vector<size_t> urlOffsets(m_urls.size());
320 for (const auto &udi : m_urls)
321 {
322 urlOffsets[udi.first]=size;
323 size+=udi.second.name.length()+1+
324 udi.second.url.length()+1;
325 }
326
327 //printf("Total size %x bytes (word=%x stats=%x urls=%x)\n",size,wordsOffset,statsOffset,urlsOffset);
328 std::ofstream f = Portable::openOutputStream(fileName);
329 if (f.is_open())
330 {
331 // write header
332 f.put('D'); f.put('O'); f.put('X'); f.put('S');
333 // write index
334 for (size_t i=0;i<numIndexEntries;i++)
335 {
336 writeInt(f,indexOffsets[i]);
337 }
338 // write word lists
339 count=0;
340 for (size_t i=0;i<numIndexEntries;i++)
341 {
342 const auto &wlist = m_index[i];
343 if (!wlist.empty())
344 {
345 for (const auto &iw : wlist)
346 {
347 writeString(f,iw.word());
348 writeInt(f,wordStatOffsets[count++]);
349 }
350 f.put(0);
351 }
352 }
353 // write extra padding bytes
354 for (size_t i=0;i<padding;i++) f.put(0);
355 // write word statistics
356 for (size_t i=0;i<numIndexEntries;i++)
357 {
358 const auto &wlist = m_index[i];
359 if (!wlist.empty())
360 {
361 for (const auto &iw : wlist)
362 {
363 size_t numUrls = iw.urls().size();
364 writeInt(f,numUrls);
365 for (const auto &ui : iw.urls())
366 {
367 writeInt(f,urlOffsets[ui.second.urlIdx]);
368 writeInt(f,ui.second.freq);
369 }
370 }
371 }
372 }
373 // write urls
374 for (const auto &udi : m_urls)
375 {
376 writeString(f,udi.second.name);
377 writeString(f,udi.second.url);
378 }
379 }
380
381}
382
383//---------------------------------------------------------------------------
384// the following part is for writing an external search index
385
389
391{
392 if (ctx && ctx->definitionType()==Definition::TypeMember)
393 {
394 const MemberDef *md = toMemberDef(ctx);
395 if (md->isFunction())
396 return "function";
397 else if (md->isSlot())
398 return "slot";
399 else if (md->isSignal())
400 return "signal";
401 else if (md->isVariable())
402 return "variable";
403 else if (md->isTypedef())
404 return "typedef";
405 else if (md->isEnumerate())
406 return "enum";
407 else if (md->isEnumValue())
408 return "enumvalue";
409 else if (md->isProperty())
410 return "property";
411 else if (md->isEvent())
412 return "event";
413 else if (md->isRelated() || md->isForeign())
414 return "related";
415 else if (md->isFriend())
416 return "friend";
417 else if (md->isDefine())
418 return "define";
419 }
420 else if (ctx)
421 {
422 switch(ctx->definitionType())
423 {
425 return (toClassDef(ctx))->compoundTypeString();
427 return "file";
429 return "namespace";
431 return "concept";
433 return "group";
435 return "package";
437 return "page";
439 return "dir";
441 return "module";
442 default:
443 break;
444 }
445 }
446 return "unknown";
447}
448
449void SearchIndexExternal::setCurrentDoc(const Definition *ctx,const QCString &anchor,bool isSourceFile)
450{
451 std::lock_guard<std::mutex> lock(g_searchIndexMutex);
452 QCString extId = stripPath(Config_getString(EXTERNAL_SEARCH_ID));
453 QCString url = isSourceFile ? (toFileDef(ctx))->getSourceFileBase() : ctx->getOutputFileBase();
455 if (!anchor.isEmpty()) url+=QCString("#")+anchor;
456 QCString key = extId+";"+url;
457
458 auto it = m_docEntries.find(key.str());
459 if (it == m_docEntries.end())
460 {
462 e.type = isSourceFile ? QCString("source") : definitionToName(ctx);
463 e.name = ctx->qualifiedName();
465 {
466 e.args = (toMemberDef(ctx))->argsString();
467 }
468 else if (ctx->definitionType()==Definition::TypeGroup)
469 {
470 const GroupDef *gd = toGroupDef(ctx);
471 if (!gd->groupTitle().isEmpty())
472 {
473 e.name = filterTitle(gd->groupTitle());
474 }
475 }
476 else if (ctx->definitionType()==Definition::TypePage)
477 {
478 const PageDef *pd = toPageDef(ctx);
479 if (pd->hasTitle())
480 {
481 e.name = filterTitle(pd->title());
482 }
483 }
484 e.extId = extId;
485 e.url = url;
486 it = m_docEntries.emplace(key.str(),e).first;
487 //printf("searchIndexExt %s : %s\n",qPrint(e->name),qPrint(e->url));
488 }
489 m_current = &it->second;
490}
491
492void SearchIndexExternal::addWord(const QCString &word,bool hiPriority)
493{
494 std::lock_guard<std::mutex> lock(g_searchIndexMutex);
495 if (word.isEmpty() || !isId(word[0]) || m_current==nullptr) return;
496 QCString &text = hiPriority ? m_current->importantText : m_current->normalText;
497 if (!text.isEmpty()) text+=' ';
498 text+=word;
499 //printf("addWord %s\n",word);
500}
501
503{
504 std::ofstream t = Portable::openOutputStream(fileName);
505 if (t.is_open())
506 {
507 t << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
508 t << "<add>\n";
509 for (auto &[name,doc] : m_docEntries)
510 {
511 t << " <doc>\n";
512 t << " <field name=\"type\">" << doc.type << "</field>\n";
513 t << " <field name=\"name\">" << convertToXML(doc.name) << "</field>\n";
514 if (!doc.args.isEmpty())
515 {
516 t << " <field name=\"args\">" << convertToXML(doc.args) << "</field>\n";
517 }
518 if (!doc.extId.isEmpty())
519 {
520 t << " <field name=\"tag\">" << convertToXML(doc.extId) << "</field>\n";
521 }
522 t << " <field name=\"url\">" << convertToXML(doc.url) << "</field>\n";
523 t << " <field name=\"keywords\">" << convertToXML(doc.importantText) << "</field>\n";
524 t << " <field name=\"text\">" << convertToXML(doc.normalText) << "</field>\n";
525 t << " </doc>\n";
526 }
527 t << "</add>\n";
528 }
529 else
530 {
531 err("Failed to open file {} for writing!\n",fileName);
532 }
533}
534
535//---------------------------------------------------------------------------------------------
536
538{
539 bool searchEngine = Config_getBool(SEARCHENGINE);
540 bool serverBasedSearch = Config_getBool(SERVER_BASED_SEARCH);
541 bool externalSearch = Config_getBool(EXTERNAL_SEARCH);
542 if (searchEngine && serverBasedSearch)
543 {
545 }
546}
547
552
553
A abstract class representing of a compound symbol.
Definition classdef.h:104
virtual QCString compoundTypeString() const =0
Returns the type of compound as a string.
The common base class of all entity definitions found in the sources.
Definition definition.h:76
virtual SrcLangExt getLanguage() const =0
Returns the programming language this definition was written in.
virtual DefType definitionType() const =0
virtual QCString qualifiedName() const =0
virtual QCString getOutputFileBase() const =0
virtual const QCString & name() const =0
static SearchIndexIntf searchIndex
Definition doxygen.h:123
A model of a group of symbols.
Definition groupdef.h:52
virtual QCString groupTitle() const =0
A model of a class/file/namespace member symbol.
Definition memberdef.h:48
virtual bool isSignal() const =0
virtual bool isFriend() const =0
virtual bool isForeign() const =0
virtual bool isRelated() const =0
virtual bool isTypedef() const =0
virtual bool isSlot() const =0
virtual bool isEvent() const =0
virtual bool isFunction() const =0
virtual bool isDefine() const =0
virtual bool isEnumerate() const =0
virtual bool isVariable() const =0
virtual bool isEnumValue() const =0
virtual bool isProperty() const =0
A model of a page symbol.
Definition pagedef.h:26
virtual bool hasTitle() const =0
virtual QCString title() const =0
This is an alternative implementation of QCString.
Definition qcstring.h:101
QCString & prepend(const char *s)
Definition qcstring.h:422
size_t length() const
Returns the length of the string, not counting the 0-terminator.
Definition qcstring.h:166
QCString lower() const
Definition qcstring.h:249
bool isEmpty() const
Returns TRUE iff the string is empty.
Definition qcstring.h:163
const std::string & str() const
Definition qcstring.h:552
const char * data() const
Returns a pointer to the contents of the string in the form of a 0-terminated C string.
Definition qcstring.h:172
void addUrlIndex(int, bool)
void addWord(const QCString &word, bool hiPriority)
SearchDocEntry * m_current
void write(const QCString &file)
std::map< std::string, SearchDocEntry > m_docEntries
void setCurrentDoc(const Definition *ctx, const QCString &anchor, bool isSourceFile)
std::map< int, URL > m_urls
void addWordRec(const QCString &word, bool hiPrio, bool recurse)
void addWord(const QCString &word, bool hiPriority)
std::vector< std::vector< IndexWord > > m_index
std::unordered_map< std::string, int > m_words
std::unordered_map< std::string, int > m_url2IdMap
void write(const QCString &file)
void setCurrentDoc(const Definition *ctx, const QCString &anchor, bool isSourceFile)
ClassDef * toClassDef(Definition *d)
#define Config_getBool(name)
Definition config.h:33
#define Config_getString(name)
Definition config.h:32
FileDef * toFileDef(Definition *d)
Definition filedef.cpp:1956
GroupDef * toGroupDef(Definition *d)
Translator * theTranslator
Definition language.cpp:71
MemberDef * toMemberDef(Definition *d)
#define err(fmt,...)
Definition message.h:127
std::ofstream openOutputStream(const QCString &name, bool append=false)
Definition portable.cpp:649
PageDef * toPageDef(Definition *d)
Definition pagedef.cpp:481
Portable versions of functions that are platform dependent.
QCString substitute(const QCString &s, const QCString &src, const QCString &dst)
substitute all occurrences of src in s by dst
Definition qcstring.cpp:571
#define TRUE
Definition qcstring.h:37
#define FALSE
Definition qcstring.h:34
static void writeString(std::ostream &f, const QCString &s)
static std::mutex g_searchIndexMutex
static void writeInt(std::ostream &f, size_t index)
static int charsToIndex(const QCString &word)
const size_t numIndexEntries
static QCString definitionToName(const Definition *ctx)
Web server based search engine.
void initSearchIndexer()
void finalizeSearchIndexer()
QCString type
QCString extId
QCString name
QCString args
QCString url
SrcLangExt
Definition types.h:207
QCString stripPath(const QCString &s)
Definition util.cpp:4890
QCString filterTitle(const QCString &title)
Definition util.cpp:5566
QCString convertToXML(const QCString &s, bool keepEntities)
Definition util.cpp:3854
QCString getLanguageSpecificSeparator(SrcLangExt lang, bool classScope)
Returns the scope separator to use given the programming language lang.
Definition util.cpp:5849
int getPrefixIndex(const QCString &name)
Definition util.cpp:3173
void addHtmlExtensionIfMissing(QCString &fName)
Definition util.cpp:4863
A bunch of utility functions.
bool isId(int c)
Definition util.h:207