Doxygen
Loading...
Searching...
No Matches
SearchIndex Class Reference

Writes search index for doxygen provided server based search engine that uses PHP. More...

#include <src/searchindex.h>

Classes

class  IndexWord
 
struct  URL
 
struct  URLInfo
 

Public Member Functions

 SearchIndex ()
 
void setCurrentDoc (const Definition *ctx, const QCString &anchor, bool isSourceFile)
 
void addWord (const QCString &word, bool hiPriority)
 
void write (const QCString &file)
 

Private Member Functions

void addWordRec (const QCString &word, bool hiPrio, bool recurse)
 

Private Attributes

std::unordered_map< std::string, int > m_words
 
std::vector< std::vector< IndexWord > > m_index
 
std::unordered_map< std::string, int > m_url2IdMap
 
std::map< int, URLm_urls
 
int m_urlIndex = -1
 
int m_urlMaxIndex = 0
 

Detailed Description

Writes search index for doxygen provided server based search engine that uses PHP.

Definition at line 65 of file searchindex.h.

Constructor & Destructor Documentation

◆ SearchIndex()

SearchIndex::SearchIndex ( )

Definition at line 69 of file searchindex.cpp.

70{
72}
std::vector< std::vector< IndexWord > > m_index
const size_t numIndexEntries

References m_index, and numIndexEntries.

Member Function Documentation

◆ addWord()

void SearchIndex::addWord ( const QCString & word,
bool hiPriority )

Definition at line 236 of file searchindex.cpp.

237{
238 std::lock_guard<std::mutex> lock(g_searchIndexMutex);
239 addWordRec(word,hiPriority,FALSE);
240}
void addWordRec(const QCString &word, bool hiPrio, bool recurse)
#define FALSE
Definition qcstring.h:34
static std::mutex g_searchIndexMutex
std::string_view word
Definition util.cpp:980

References addWordRec(), FALSE, g_searchIndexMutex, and word.

◆ addWordRec()

void SearchIndex::addWordRec ( const QCString & word,
bool hiPrio,
bool recurse )
private

Definition at line 195 of file searchindex.cpp.

196{
197 if (word.isEmpty()) return;
198 QCString wStr = QCString(word).lower();
199 //printf("SearchIndex::addWord(%s,%d) wStr=%s\n",word,hiPriority,qPrint(wStr));
200 int idx=charsToIndex(wStr);
201 if (idx<0 || idx>=static_cast<int>(m_index.size())) return;
202 auto it = m_words.find(wStr.str());
203 if (it==m_words.end())
204 {
205 //fprintf(stderr,"addWord(%s) at index %d\n",word,idx);
206 m_index[idx].emplace_back(wStr);
207 it = m_words.emplace( wStr.str(), static_cast<int>(m_index[idx].size())-1 ).first;
208 }
209 m_index[idx][it->second].addUrlIndex(m_urlIndex,hiPriority);
210 bool found=FALSE;
211 if (!recurse) // the first time we check if we can strip the prefix
212 {
213 int i=getPrefixIndex(word);
214 if (i>0)
215 {
216 addWordRec(word.data()+i,hiPriority,TRUE);
217 found=TRUE;
218 }
219 }
220 if (!found) // no prefix stripped
221 {
222 int i=0;
223 while (word[i]!=0 &&
224 !((word[i]=='_' || word[i]==':' || (word[i]>='a' && word[i]<='z')) && // [_a-z:]
225 (word[i+1]>='A' && word[i+1]<='Z'))) // [A-Z]
226 {
227 i++;
228 }
229 if (word[i]!=0 && i>=1)
230 {
231 addWordRec(word.data()+i+1,hiPriority,TRUE);
232 }
233 }
234}
const std::string & str() const
Definition qcstring.h:537
std::unordered_map< std::string, int > m_words
#define TRUE
Definition qcstring.h:37
static int charsToIndex(const QCString &word)
bool found
Definition util.cpp:984
int getPrefixIndex(const QCString &name)
Definition util.cpp:3590

References addWordRec(), charsToIndex(), FALSE, found, getPrefixIndex(), QCString::lower(), m_index, m_urlIndex, m_words, QCString::str(), TRUE, and word.

Referenced by addWord(), and addWordRec().

◆ setCurrentDoc()

void SearchIndex::setCurrentDoc ( const Definition * ctx,
const QCString & anchor,
bool isSourceFile )

Definition at line 74 of file searchindex.cpp.

75{
76 if (ctx==nullptr) return;
77 std::lock_guard<std::mutex> lock(g_searchIndexMutex);
78 assert(!isSourceFile || ctx->definitionType()==Definition::TypeFile);
79 //printf("SearchIndex::setCurrentDoc(%s,%s,%s)\n",name,baseName,anchor);
80 QCString url=isSourceFile ? (toFileDef(ctx))->getSourceFileBase() : ctx->getOutputFileBase();
81 url+=Config_getString(HTML_FILE_EXTENSION);
82 QCString baseUrl = url;
83 if (!anchor.isEmpty()) url+=QCString("#")+anchor;
84 if (!isSourceFile) baseUrl=url;
85 QCString name=ctx->qualifiedName();
87 {
88 const MemberDef *md = toMemberDef(ctx);
92 }
93 else // compound type
94 {
95 SrcLangExt lang = ctx->getLanguage();
96 QCString sep = getLanguageSpecificSeparator(lang);
97 if (sep!="::")
98 {
99 name = substitute(name,"::",sep);
100 }
101 switch (ctx->definitionType())
102 {
104 {
105 const PageDef *pd = toPageDef(ctx);
106 if (pd->hasTitle())
107 {
108 name = theTranslator->trPage(TRUE,TRUE)+" "+pd->title();
109 }
110 else
111 {
112 name = theTranslator->trPage(TRUE,TRUE)+" "+pd->name();
113 }
114 }
115 break;
117 {
118 const ClassDef *cd = toClassDef(ctx);
119 name.prepend(cd->compoundTypeString()+" ");
120 }
121 break;
123 {
124 if (lang==SrcLangExt::Java || lang==SrcLangExt::CSharp)
125 {
126 name = theTranslator->trPackage(name);
127 }
128 else if (lang==SrcLangExt::Fortran)
129 {
131 }
132 else
133 {
135 }
136 }
137 break;
139 {
140 const GroupDef *gd = toGroupDef(ctx);
141 if (!gd->groupTitle().isEmpty())
142 {
143 name = theTranslator->trGroup(TRUE,TRUE)+" "+gd->groupTitle();
144 }
145 else
146 {
148 }
149 }
150 break;
152 {
154 }
155 break;
156 default:
157 break;
158 }
159 }
160
161 auto it = m_url2IdMap.find(baseUrl.str());
162 if (it == m_url2IdMap.end()) // new entry
163 {
165 m_url2IdMap.emplace(baseUrl.str(),m_urlIndex);
166 m_urls.emplace(m_urlIndex,URL(name,url));
167 }
168 else // existing entry
169 {
170 m_urlIndex=it->second;
171 m_urls.emplace(it->second,URL(name,url));
172 }
173}
virtual QCString compoundTypeString() const =0
Returns the type of compound as a string.
virtual SrcLangExt getLanguage() const =0
Returns the programming language this definition was written in.
virtual DefType definitionType() const =0
virtual QCString qualifiedName() const =0
virtual const QCString & name() const =0
virtual QCString groupTitle() const =0
virtual bool hasTitle() const =0
virtual QCString title() const =0
QCString & prepend(const char *s)
Definition qcstring.h:407
bool isEmpty() const
Returns TRUE iff the string is empty.
Definition qcstring.h:150
std::map< int, URL > m_urls
std::unordered_map< std::string, int > m_url2IdMap
virtual QCString trPackage(const QCString &name)=0
virtual QCString trGroup(bool first_capital, bool singular)=0
virtual QCString trNamespace(bool first_capital, bool singular)=0
virtual QCString trPage(bool first_capital, bool singular)=0
virtual QCString trSubprogram(bool first_capital, bool singular)=0
virtual QCString trMember(bool first_capital, bool singular)=0
virtual QCString trModule(bool first_capital, bool singular)=0
ClassDef * toClassDef(Definition *d)
#define Config_getString(name)
Definition config.h:32
FileDef * toFileDef(Definition *d)
Definition filedef.cpp:1894
GroupDef * toGroupDef(Definition *d)
Translator * theTranslator
Definition language.cpp:71
MemberDef * toMemberDef(Definition *d)
PageDef * toPageDef(Definition *d)
Definition pagedef.cpp:467
QCString substitute(const QCString &s, const QCString &src, const QCString &dst)
substitute all occurrences of src in s by dst
Definition qcstring.cpp:477
SrcLangExt
Language as given by extension.
Definition types.h:42
@ CSharp
Definition types.h:46
@ Fortran
Definition types.h:53
QCString getLanguageSpecificSeparator(SrcLangExt lang, bool classScope)
Returns the scope separator to use given the programming language lang.
Definition util.cpp:6230

References ClassDef::compoundTypeString(), Config_getString, CSharp, Definition::definitionType(), Fortran, g_searchIndexMutex, Definition::getLanguage(), getLanguageSpecificSeparator(), Definition::getOutputFileBase(), GroupDef::groupTitle(), PageDef::hasTitle(), QCString::isEmpty(), Java, m_url2IdMap, m_urlIndex, m_urlMaxIndex, m_urls, Definition::name(), QCString::prepend(), Definition::qualifiedName(), QCString::str(), substitute(), theTranslator, PageDef::title(), toClassDef(), toFileDef(), toGroupDef(), toMemberDef(), toPageDef(), TRUE, Definition::TypeClass, Definition::TypeFile, Definition::TypeGroup, Definition::TypeMember, Definition::TypeModule, Definition::TypeNamespace, and Definition::TypePage.

◆ write()

void SearchIndex::write ( const QCString & file)

Definition at line 257 of file searchindex.cpp.

258{
259 size_t size=4; // for the header
260 size+=4*numIndexEntries; // for the index
261 size_t wordsOffset = size;
262 // first pass: compute the size of the wordlist
263 for (size_t i=0;i<numIndexEntries;i++)
264 {
265 const auto &wlist = m_index[i];
266 if (!wlist.empty())
267 {
268 for (const auto &iw : wlist)
269 {
270 size_t ws = iw.word().length()+1;
271 size+=ws+4; // word + url info list offset
272 }
273 size+=1; // zero list terminator
274 }
275 }
276
277 // second pass: compute the offsets in the index
278 size_t indexOffsets[numIndexEntries];
279 size_t offset=wordsOffset;
280 for (size_t i=0;i<numIndexEntries;i++)
281 {
282 const auto &wlist = m_index[i];
283 if (!wlist.empty())
284 {
285 indexOffsets[i]=offset;
286 for (const auto &iw : wlist)
287 {
288 offset+= iw.word().length()+1;
289 offset+=4; // word + offset to url info array
290 }
291 offset+=1; // zero list terminator
292 }
293 else
294 {
295 indexOffsets[i]=0;
296 }
297 }
298 size_t padding = size;
299 size = (size+3)&~3; // round up to 4 byte boundary
300 padding = size - padding;
301
302 std::vector<size_t> wordStatOffsets(m_words.size());
303
304 int count=0;
305
306 // third pass: compute offset to stats info for each word
307 for (size_t i=0;i<numIndexEntries;i++)
308 {
309 const auto &wlist = m_index[i];
310 if (!wlist.empty())
311 {
312 for (const auto &iw : wlist)
313 {
314 //printf("wordStatOffsets[%d]=%d\n",count,size);
315 wordStatOffsets[count++] = size;
316 size+=4 + iw.urls().size() * 8; // count + (url_index,freq) per url
317 }
318 }
319 }
320 std::vector<size_t> urlOffsets(m_urls.size());
321 for (const auto &udi : m_urls)
322 {
323 urlOffsets[udi.first]=size;
324 size+=udi.second.name.length()+1+
325 udi.second.url.length()+1;
326 }
327
328 //printf("Total size %x bytes (word=%x stats=%x urls=%x)\n",size,wordsOffset,statsOffset,urlsOffset);
329 std::ofstream f = Portable::openOutputStream(fileName);
330 if (f.is_open())
331 {
332 // write header
333 f.put('D'); f.put('O'); f.put('X'); f.put('S');
334 // write index
335 for (size_t i=0;i<numIndexEntries;i++)
336 {
337 writeInt(f,indexOffsets[i]);
338 }
339 // write word lists
340 count=0;
341 for (size_t i=0;i<numIndexEntries;i++)
342 {
343 const auto &wlist = m_index[i];
344 if (!wlist.empty())
345 {
346 for (const auto &iw : wlist)
347 {
348 writeString(f,iw.word());
349 writeInt(f,wordStatOffsets[count++]);
350 }
351 f.put(0);
352 }
353 }
354 // write extra padding bytes
355 for (size_t i=0;i<padding;i++) f.put(0);
356 // write word statistics
357 for (size_t i=0;i<numIndexEntries;i++)
358 {
359 const auto &wlist = m_index[i];
360 if (!wlist.empty())
361 {
362 for (const auto &iw : wlist)
363 {
364 size_t numUrls = iw.urls().size();
365 writeInt(f,numUrls);
366 for (const auto &ui : iw.urls())
367 {
368 writeInt(f,urlOffsets[ui.second.urlIdx]);
369 writeInt(f,ui.second.freq);
370 }
371 }
372 }
373 }
374 // write urls
375 for (const auto &udi : m_urls)
376 {
377 writeString(f,udi.second.name);
378 writeString(f,udi.second.url);
379 }
380 }
381
382}
std::ofstream openOutputStream(const QCString &name, bool append=false)
Definition portable.cpp:665
static void writeString(std::ostream &f, const QCString &s)
static void writeInt(std::ostream &f, size_t index)

References m_index, m_urls, m_words, numIndexEntries, Portable::openOutputStream(), writeInt(), and writeString().

Member Data Documentation

◆ m_index

std::vector< std::vector< IndexWord> > SearchIndex::m_index
private

Definition at line 103 of file searchindex.h.

Referenced by addWordRec(), SearchIndex(), and write().

◆ m_url2IdMap

std::unordered_map<std::string,int> SearchIndex::m_url2IdMap
private

Definition at line 104 of file searchindex.h.

Referenced by setCurrentDoc().

◆ m_urlIndex

int SearchIndex::m_urlIndex = -1
private

Definition at line 106 of file searchindex.h.

Referenced by addWordRec(), and setCurrentDoc().

◆ m_urlMaxIndex

int SearchIndex::m_urlMaxIndex = 0
private

Definition at line 107 of file searchindex.h.

Referenced by setCurrentDoc().

◆ m_urls

std::map<int,URL> SearchIndex::m_urls
private

Definition at line 105 of file searchindex.h.

Referenced by setCurrentDoc(), and write().

◆ m_words

std::unordered_map<std::string,int> SearchIndex::m_words
private

Definition at line 102 of file searchindex.h.

Referenced by addWordRec(), and write().


The documentation for this class was generated from the following files: