Doxygen
Loading...
Searching...
No Matches
SearchIndex Class Reference

Writes search index for doxygen provided server based search engine that uses PHP. More...

#include <src/searchindex.h>

Classes

struct  URL
struct  URLInfo
class  IndexWord

Public Member Functions

 SearchIndex ()
void setCurrentDoc (const Definition *ctx, const QCString &anchor, bool isSourceFile)
void addWord (const QCString &word, bool hiPriority)
void write (const QCString &file)

Private Member Functions

void addWordRec (const QCString &word, bool hiPrio, bool recurse)

Private Attributes

std::unordered_map< std::string, int > m_words
std::vector< std::vector< IndexWord > > m_index
std::unordered_map< std::string, int > m_url2IdMap
std::map< int, URLm_urls
int m_urlIndex = -1
int m_urlMaxIndex = 0

Detailed Description

Writes search index for doxygen provided server based search engine that uses PHP.

Definition at line 64 of file searchindex.h.

Constructor & Destructor Documentation

◆ SearchIndex()

SearchIndex::SearchIndex ( )

Definition at line 68 of file searchindex.cpp.

69{
71}
std::vector< std::vector< IndexWord > > m_index
const size_t numIndexEntries

References m_index, and numIndexEntries.

Member Function Documentation

◆ addWord()

void SearchIndex::addWord ( const QCString & word,
bool hiPriority )

Definition at line 235 of file searchindex.cpp.

236{
237 std::lock_guard<std::mutex> lock(g_searchIndexMutex);
238 addWordRec(word,hiPriority,FALSE);
239}
void addWordRec(const QCString &word, bool hiPrio, bool recurse)
#define FALSE
Definition qcstring.h:34
static std::mutex g_searchIndexMutex

References addWordRec(), FALSE, and g_searchIndexMutex.

◆ addWordRec()

void SearchIndex::addWordRec ( const QCString & word,
bool hiPrio,
bool recurse )
private

Definition at line 194 of file searchindex.cpp.

195{
196 if (word.isEmpty()) return;
197 QCString wStr = QCString(word).lower();
198 //printf("SearchIndex::addWord(%s,%d) wStr=%s\n",word,hiPriority,qPrint(wStr));
199 int idx=charsToIndex(wStr);
200 if (idx<0 || idx>=static_cast<int>(m_index.size())) return;
201 auto it = m_words.find(wStr.str());
202 if (it==m_words.end())
203 {
204 //fprintf(stderr,"addWord(%s) at index %d\n",word,idx);
205 m_index[idx].emplace_back(wStr);
206 it = m_words.emplace( wStr.str(), static_cast<int>(m_index[idx].size())-1 ).first;
207 }
208 m_index[idx][it->second].addUrlIndex(m_urlIndex,hiPriority);
209 bool found=FALSE;
210 if (!recurse) // the first time we check if we can strip the prefix
211 {
212 int i=getPrefixIndex(word);
213 if (i>0)
214 {
215 addWordRec(word.data()+i,hiPriority,TRUE);
216 found=TRUE;
217 }
218 }
219 if (!found) // no prefix stripped
220 {
221 int i=0;
222 while (word[i]!=0 &&
223 !((word[i]=='_' || word[i]==':' || (word[i]>='a' && word[i]<='z')) && // [_a-z:]
224 (word[i+1]>='A' && word[i+1]<='Z'))) // [A-Z]
225 {
226 i++;
227 }
228 if (word[i]!=0 && i>=1)
229 {
230 addWordRec(word.data()+i+1,hiPriority,TRUE);
231 }
232 }
233}
bool isEmpty() const
Returns TRUE iff the string is empty.
Definition qcstring.h:163
const std::string & str() const
Definition qcstring.h:552
const char * data() const
Returns a pointer to the contents of the string in the form of a 0-terminated C string.
Definition qcstring.h:172
std::unordered_map< std::string, int > m_words
#define TRUE
Definition qcstring.h:37
static int charsToIndex(const QCString &word)
int getPrefixIndex(const QCString &name)
Definition util.cpp:3173

References addWordRec(), charsToIndex(), QCString::data(), FALSE, getPrefixIndex(), QCString::isEmpty(), QCString::lower(), m_index, m_urlIndex, m_words, QCString::str(), and TRUE.

Referenced by addWord(), and addWordRec().

◆ setCurrentDoc()

void SearchIndex::setCurrentDoc ( const Definition * ctx,
const QCString & anchor,
bool isSourceFile )

Definition at line 73 of file searchindex.cpp.

74{
75 if (ctx==nullptr) return;
76 std::lock_guard<std::mutex> lock(g_searchIndexMutex);
77 assert(!isSourceFile || ctx->definitionType()==Definition::TypeFile);
78 //printf("SearchIndex::setCurrentDoc(%s,%s,%s)\n",name,baseName,anchor);
79 QCString url=isSourceFile ? (toFileDef(ctx))->getSourceFileBase() : ctx->getOutputFileBase();
80 url+=Config_getString(HTML_FILE_EXTENSION);
81 QCString baseUrl = url;
82 if (!anchor.isEmpty()) url+=QCString("#")+anchor;
83 if (!isSourceFile) baseUrl=url;
84 QCString name=ctx->qualifiedName();
86 {
87 const MemberDef *md = toMemberDef(ctx);
88 name.prepend((md->getLanguage()==SrcLangExt::Fortran ?
91 }
92 else // compound type
93 {
94 SrcLangExt lang = ctx->getLanguage();
95 QCString sep = getLanguageSpecificSeparator(lang);
96 if (sep!="::")
97 {
98 name = substitute(name,"::",sep);
99 }
100 switch (ctx->definitionType())
101 {
103 {
104 const PageDef *pd = toPageDef(ctx);
105 if (pd->hasTitle())
106 {
107 name = theTranslator->trPage(TRUE,TRUE)+" "+pd->title();
108 }
109 else
110 {
111 name = theTranslator->trPage(TRUE,TRUE)+" "+pd->name();
112 }
113 }
114 break;
116 {
117 const ClassDef *cd = toClassDef(ctx);
118 name.prepend(cd->compoundTypeString()+" ");
119 }
120 break;
122 {
123 if (lang==SrcLangExt::Java || lang==SrcLangExt::CSharp)
124 {
125 name = theTranslator->trPackage(name);
126 }
127 else if (lang==SrcLangExt::Fortran)
128 {
130 }
131 else
132 {
134 }
135 }
136 break;
138 {
139 const GroupDef *gd = toGroupDef(ctx);
140 if (!gd->groupTitle().isEmpty())
141 {
142 name = theTranslator->trGroup(TRUE,TRUE)+" "+gd->groupTitle();
143 }
144 else
145 {
147 }
148 }
149 break;
151 {
153 }
154 break;
155 default:
156 break;
157 }
158 }
159
160 auto it = m_url2IdMap.find(baseUrl.str());
161 if (it == m_url2IdMap.end()) // new entry
162 {
164 m_url2IdMap.emplace(baseUrl.str(),m_urlIndex);
165 m_urls.emplace(m_urlIndex,URL(name,url));
166 }
167 else // existing entry
168 {
169 m_urlIndex=it->second;
170 m_urls.emplace(it->second,URL(name,url));
171 }
172}
virtual QCString compoundTypeString() const =0
Returns the type of compound as a string.
virtual SrcLangExt getLanguage() const =0
Returns the programming language this definition was written in.
virtual DefType definitionType() const =0
virtual QCString qualifiedName() const =0
virtual const QCString & name() const =0
virtual QCString groupTitle() const =0
virtual bool hasTitle() const =0
virtual QCString title() const =0
QCString & prepend(const char *s)
Definition qcstring.h:422
std::map< int, URL > m_urls
std::unordered_map< std::string, int > m_url2IdMap
virtual QCString trPackage(const QCString &name)=0
virtual QCString trGroup(bool first_capital, bool singular)=0
virtual QCString trNamespace(bool first_capital, bool singular)=0
virtual QCString trPage(bool first_capital, bool singular)=0
virtual QCString trSubprogram(bool first_capital, bool singular)=0
virtual QCString trMember(bool first_capital, bool singular)=0
virtual QCString trModule(bool first_capital, bool singular)=0
ClassDef * toClassDef(Definition *d)
#define Config_getString(name)
Definition config.h:32
FileDef * toFileDef(Definition *d)
Definition filedef.cpp:1956
GroupDef * toGroupDef(Definition *d)
Translator * theTranslator
Definition language.cpp:71
MemberDef * toMemberDef(Definition *d)
PageDef * toPageDef(Definition *d)
Definition pagedef.cpp:481
QCString substitute(const QCString &s, const QCString &src, const QCString &dst)
substitute all occurrences of src in s by dst
Definition qcstring.cpp:571
SrcLangExt
Definition types.h:207
QCString getLanguageSpecificSeparator(SrcLangExt lang, bool classScope)
Returns the scope separator to use given the programming language lang.
Definition util.cpp:5849

References ClassDef::compoundTypeString(), Config_getString, Definition::definitionType(), g_searchIndexMutex, Definition::getLanguage(), getLanguageSpecificSeparator(), Definition::getOutputFileBase(), GroupDef::groupTitle(), PageDef::hasTitle(), QCString::isEmpty(), m_url2IdMap, m_urlIndex, m_urlMaxIndex, m_urls, Definition::name(), QCString::prepend(), Definition::qualifiedName(), QCString::str(), substitute(), theTranslator, PageDef::title(), toClassDef(), toFileDef(), toGroupDef(), toMemberDef(), toPageDef(), TRUE, Definition::TypeClass, Definition::TypeFile, Definition::TypeGroup, Definition::TypeMember, Definition::TypeModule, Definition::TypeNamespace, and Definition::TypePage.

◆ write()

void SearchIndex::write ( const QCString & file)

Definition at line 256 of file searchindex.cpp.

257{
258 size_t size=4; // for the header
259 size+=4*numIndexEntries; // for the index
260 size_t wordsOffset = size;
261 // first pass: compute the size of the wordlist
262 for (size_t i=0;i<numIndexEntries;i++)
263 {
264 const auto &wlist = m_index[i];
265 if (!wlist.empty())
266 {
267 for (const auto &iw : wlist)
268 {
269 size_t ws = iw.word().length()+1;
270 size+=ws+4; // word + url info list offset
271 }
272 size+=1; // zero list terminator
273 }
274 }
275
276 // second pass: compute the offsets in the index
277 size_t indexOffsets[numIndexEntries];
278 size_t offset=wordsOffset;
279 for (size_t i=0;i<numIndexEntries;i++)
280 {
281 const auto &wlist = m_index[i];
282 if (!wlist.empty())
283 {
284 indexOffsets[i]=offset;
285 for (const auto &iw : wlist)
286 {
287 offset+= iw.word().length()+1;
288 offset+=4; // word + offset to url info array
289 }
290 offset+=1; // zero list terminator
291 }
292 else
293 {
294 indexOffsets[i]=0;
295 }
296 }
297 size_t padding = size;
298 size = (size+3)&~3; // round up to 4 byte boundary
299 padding = size - padding;
300
301 std::vector<size_t> wordStatOffsets(m_words.size());
302
303 int count=0;
304
305 // third pass: compute offset to stats info for each word
306 for (size_t i=0;i<numIndexEntries;i++)
307 {
308 const auto &wlist = m_index[i];
309 if (!wlist.empty())
310 {
311 for (const auto &iw : wlist)
312 {
313 //printf("wordStatOffsets[%d]=%d\n",count,size);
314 wordStatOffsets[count++] = size;
315 size+=4 + iw.urls().size() * 8; // count + (url_index,freq) per url
316 }
317 }
318 }
319 std::vector<size_t> urlOffsets(m_urls.size());
320 for (const auto &udi : m_urls)
321 {
322 urlOffsets[udi.first]=size;
323 size+=udi.second.name.length()+1+
324 udi.second.url.length()+1;
325 }
326
327 //printf("Total size %x bytes (word=%x stats=%x urls=%x)\n",size,wordsOffset,statsOffset,urlsOffset);
328 std::ofstream f = Portable::openOutputStream(fileName);
329 if (f.is_open())
330 {
331 // write header
332 f.put('D'); f.put('O'); f.put('X'); f.put('S');
333 // write index
334 for (size_t i=0;i<numIndexEntries;i++)
335 {
336 writeInt(f,indexOffsets[i]);
337 }
338 // write word lists
339 count=0;
340 for (size_t i=0;i<numIndexEntries;i++)
341 {
342 const auto &wlist = m_index[i];
343 if (!wlist.empty())
344 {
345 for (const auto &iw : wlist)
346 {
347 writeString(f,iw.word());
348 writeInt(f,wordStatOffsets[count++]);
349 }
350 f.put(0);
351 }
352 }
353 // write extra padding bytes
354 for (size_t i=0;i<padding;i++) f.put(0);
355 // write word statistics
356 for (size_t i=0;i<numIndexEntries;i++)
357 {
358 const auto &wlist = m_index[i];
359 if (!wlist.empty())
360 {
361 for (const auto &iw : wlist)
362 {
363 size_t numUrls = iw.urls().size();
364 writeInt(f,numUrls);
365 for (const auto &ui : iw.urls())
366 {
367 writeInt(f,urlOffsets[ui.second.urlIdx]);
368 writeInt(f,ui.second.freq);
369 }
370 }
371 }
372 }
373 // write urls
374 for (const auto &udi : m_urls)
375 {
376 writeString(f,udi.second.name);
377 writeString(f,udi.second.url);
378 }
379 }
380
381}
std::ofstream openOutputStream(const QCString &name, bool append=false)
Definition portable.cpp:649
static void writeString(std::ostream &f, const QCString &s)
static void writeInt(std::ostream &f, size_t index)

References m_index, m_urls, m_words, numIndexEntries, Portable::openOutputStream(), writeInt(), and writeString().

Member Data Documentation

◆ m_index

std::vector< std::vector< IndexWord> > SearchIndex::m_index
private

Definition at line 102 of file searchindex.h.

Referenced by addWordRec(), SearchIndex(), and write().

◆ m_url2IdMap

std::unordered_map<std::string,int> SearchIndex::m_url2IdMap
private

Definition at line 103 of file searchindex.h.

Referenced by setCurrentDoc().

◆ m_urlIndex

int SearchIndex::m_urlIndex = -1
private

Definition at line 105 of file searchindex.h.

Referenced by addWordRec(), and setCurrentDoc().

◆ m_urlMaxIndex

int SearchIndex::m_urlMaxIndex = 0
private

Definition at line 106 of file searchindex.h.

Referenced by setCurrentDoc().

◆ m_urls

std::map<int,URL> SearchIndex::m_urls
private

Definition at line 104 of file searchindex.h.

Referenced by setCurrentDoc(), and write().

◆ m_words

std::unordered_map<std::string,int> SearchIndex::m_words
private

Definition at line 101 of file searchindex.h.

Referenced by addWordRec(), and write().


The documentation for this class was generated from the following files: