head 1.9; access; symbols OPENPKG_E1_MP_HEAD:1.9 OPENPKG_E1_MP:1.9 OPENPKG_E1_MP_2_STABLE:1.9 OPENPKG_E1_FP:1.9 OPENPKG_2_STABLE_MP:1.9 OPENPKG_2_STABLE_20061018:1.9 OPENPKG_2_STABLE_20060622:1.9 OPENPKG_2_STABLE:1.9.0.2 OPENPKG_2_STABLE_BP:1.9 OPENPKG_2_5_SOLID:1.5.0.12 OPENPKG_2_5_SOLID_BP:1.5 OPENPKG_2_4_RELEASE:1.5 OPENPKG_2_4_SOLID:1.5.0.10 OPENPKG_2_4_SOLID_BP:1.5 OPENPKG_2_3_RELEASE:1.5 OPENPKG_2_3_SOLID:1.5.0.8 OPENPKG_2_3_SOLID_BP:1.5 OPENPKG_2_2_RELEASE:1.5 OPENPKG_2_2_SOLID:1.5.0.6 OPENPKG_2_2_SOLID_BP:1.5 OPENPKG_2_1_RELEASE:1.5 OPENPKG_2_1_SOLID:1.5.0.4 OPENPKG_2_1_SOLID_BP:1.5 OPENPKG_2_0_RELEASE:1.5 OPENPKG_2_0_SOLID:1.5.0.2 OPENPKG_2_0_SOLID_BP:1.5 OPENPKG_1_3_RELEASE:1.3.2.1 OPENPKG_1_3_SOLID:1.3.2.1.0.2 OPENPKG_1_3_SOLID_BP:1.3.2.1 OPENPKG_1_STABLE_MP:1.4 OPENPKG_1_2_RELEASE:1.3 OPENPKG_1_2_SOLID:1.3.0.4 OPENPKG_1_2_SOLID_BP:1.3 OPENPKG_1_STABLE:1.3.0.2 OPENPKG_1_STABLE_BP:1.3; locks; strict; comment @# @; 1.9 date 2006.06.17.20.46.29; author rse; state Exp; branches; next 1.8; commitid S5GZvilExUt9SoBr; 1.8 date 2006.03.24.19.21.33; author rse; state Exp; branches; next 1.7; commitid 0AM5RmPKDwnq8tqr; 1.7 date 2006.03.24.09.32.31; author rse; state Exp; branches; next 1.6; commitid mZdwQ9muxZLkSpqr; 1.6 date 2006.03.23.19.02.35; author rse; state Exp; branches; next 1.5; commitid XxCWJ1I1kaHT3lqr; 1.5 date 2003.11.08.21.36.34; author rse; state dead; branches; next 1.4; 1.4 date 2003.06.13.07.22.10; author rse; state Exp; branches; next 1.3; 1.3 date 2003.01.14.15.42.06; author rse; state Exp; branches 1.3.2.1; next 1.2; 1.2 date 2002.12.16.09.55.55; author cs; state Exp; branches; next 1.1; 1.1 date 2002.12.13.13.58.57; author cs; state Exp; branches; next ; 1.3.2.1 date 2003.07.24.20.49.44; author rse; state Exp; branches; next ; desc @@ 1.9 log @fix building under new GCC 4.1 C++ world order @ text @Index: htdig/ExternalParser.cc --- htdig/ExternalParser.cc.orig 2004-05-28 15:15:14 +0200 +++ htdig/ExternalParser.cc 2006-06-17 22:25:21 +0200 @@@@ -227,7 +227,7 @@@@ int get_file = (convertToType.length() != 0); String newcontent; - StringList cpargs(currentParser); + StringList cpargs(currentParser, " \t"); char **parsargs = new char * [cpargs.Count() + 5]; int argi; for (argi = 0; argi < cpargs.Count(); argi++) @@@@ -424,7 +424,7 @@@@ { metadatetags = new StringMatch(); metadatetags->IgnoreCase(); - metadatetags->Pattern("date|dc.date|dc.date.created|dc.data.modified"); + metadatetags->Pattern("date|dc.date|dc.date.created|dc.date.modified"); } // Index: htnet/HtHTTP.cc --- htnet/HtHTTP.cc.orig 2004-05-28 15:15:23 +0200 +++ htnet/HtHTTP.cc 2006-06-17 22:25:21 +0200 @@@@ -643,6 +643,8 @@@@ String line = 0; int inHeader = 1; + _needUTF8Convert = 0; + if (_response._modification_time) { delete _response._modification_time; @@@@ -731,7 +733,15 @@@@ token = strtok(token, "\n\t"); if (token && *token) + { _response._content_type = token; + if ((_response._content_type.indexOf("text/html") != -1) && (_response._content_type.indexOf("UTF-8") != -1)) + { + if ( debug > 4 ) + cout << "needUTF8Convert flagged" << endl; + _needUTF8Convert = 1; + } + } } else if( ! mystrncasecmp((char*)line, "content-length:", 15)) @@@@ -970,6 +980,31 @@@@ } + if ( _needUTF8Convert ) + { + if ( debug > 4 ) + cout << "Converting UTF-8 characters" << endl; + + char *srcPtr, *dstPtr; + srcPtr = dstPtr = _response._contents.get(); + while ( *srcPtr ) + { + if ( ( *srcPtr & 0x80 ) == 0 ) + *dstPtr++ = *srcPtr++; + else if ( ( *srcPtr & 0xE0 ) == 0xC0 ) { + *dstPtr++ = (((*srcPtr & 0x03) << 6) | (*(srcPtr+1) & 0x3F) ) & 0xFF; + srcPtr += 2; + } else if ( ( *srcPtr & 0xF0 ) == 0xE0 ) { + *dstPtr++ = '?'; + srcPtr += 3; + } else { + *dstPtr++ = '?'; + srcPtr += 4; + } + } + *dstPtr = 0; + } + // Set document length _response._document_length = _response._contents.length(); Index: htnet/HtHTTP.h --- htnet/HtHTTP.h.orig 2004-05-28 15:15:23 +0200 +++ htnet/HtHTTP.h 2006-06-17 22:25:21 +0200 @@@@ -316,6 +316,7 @@@@ int _bytes_read; // Bytes read URL _url; // URL to retrieve URL _referer; // Referring URL + int _needUTF8Convert; // Flag for simple UTF-8 convert String _accept_language; // accept-language directive Index: htnet/SSLConnection.cc --- htnet/SSLConnection.cc.orig 2004-05-28 15:15:23 +0200 +++ htnet/SSLConnection.cc 2006-06-17 22:25:21 +0200 @@@@ -131,6 +131,7 @@@@ { errno = 0; + if (!SSL_pending(ssl)) { if (timeout_value > 0) { FD_SET_T fds; FD_ZERO(&fds); @@@@ -144,6 +145,7 @@@@ if (selected <= 0) need_io_stop++; } + } if (!need_io_stop) count = SSL_read(ssl, buffer, maxlength); Index: htsearch/Collection.h --- htsearch/Collection.h.orig 2004-05-28 15:15:24 +0200 +++ htsearch/Collection.h 2006-06-17 22:28:24 +0200 @@@@ -36,9 +36,9 @@@@ const char *docExcerpt); ~Collection(); - void Collection::Open(); + void Open(); - void Collection::Close(); + void Close(); char *getWordFile() { return wordFile.get(); } DocumentRef *getDocumentRef(int id); Index: htsearch/Display.cc --- htsearch/Display.cc.orig 2004-05-28 15:15:24 +0200 +++ htsearch/Display.cc 2006-06-17 22:25:21 +0200 @@@@ -362,7 +362,7 @@@@ if (maxScore != 0 && maxScore != minScore) { - int percent = (int)((ref->DocScore() - minScore) * 100 / + int percent = (int)((ref->DocScore() - minScore) * 100.0 / (maxScore - minScore)); if (percent <= 0) percent = 1; @@@@ -694,6 +694,38 @@@@ // if (nPages > 1) { + // Assume number of page links is equal to maximum_page_buttons + // For example, if pageNumber=9, maximum_page_buttons=10, + // and nPages>=13, we get: + // + // [prev] 4 5 6 7 8 9 10 11 12 13 [next] + + int nPageButtons = config->Value("maximum_page_buttons", 10); + + // Initialize indexes of pages links + int first_page_index = 1; + int last_page_index = nPages; + + if (nPages > nPageButtons) + { + // Try to center the current page + int links_on_the_left = nPageButtons/2; + first_page_index = pageNumber - links_on_the_left; + last_page_index = first_page_index + nPageButtons - 1; + + // Adjust if required + if (first_page_index < 1) + { + first_page_index = 1; + last_page_index = nPageButtons; + } + else if (last_page_index > nPages ) + { + last_page_index = nPages; + first_page_index = nPages - nPageButtons + 1; + } + } + if (pageNumber > 1) { str = new String("Find("page_number_text"), " \t\r\n"); QuotedStringList npnt(config->Find("no_page_number_text"), " \t\r\n"); QuotedStringList sep(config->Find("page_number_separator"), " \t\r\n"); - if (nPages > config->Value("maximum_page_buttons", 10)) - nPages = config->Value("maximum_page_buttons", 10); - for (i = 1; i <= nPages; i++) + + for (i = first_page_index; i <= last_page_index; i++) { if (i == pageNumber) { Index: httools/htmerge.cc --- httools/htmerge.cc.orig 2004-05-28 15:15:25 +0200 +++ httools/htmerge.cc 2006-06-17 22:25:21 +0200 @@@@ -191,6 +191,64 @@@@ return 0; } +// Declare a record for storing callback data +class CallbackData : public Object +{ +public: + CallbackData(HtWordList * w, Dictionary * d, int o) + { word_db = w; dup_ids = d; docIDOffset = o; } + + HtWordList * word_db; + Dictionary * dup_ids; + int docIDOffset; +}; + + +//***************************************************************************** +// int OverrideCallback(WordList * wl, WordDBCursor &, +// const WordReference * w, Object & d ) +// +int +OverrideCallback(WordList * wl, + WordDBCursor &, + const WordReference * w, + Object & d) +{ + CallbackData & data = ((CallbackData &)d); + HtWordReference * ht_wr = (HtWordReference *)w; + String docIDKey; + + docIDKey << ht_wr->DocID(); + if (!((data.dup_ids)->Exists(docIDKey))) + { + ht_wr->DocID(ht_wr->DocID() + data.docIDOffset); + (data.word_db)->Override(*ht_wr); + } + + return OK; +} + +//***************************************************************************** +// int DeleteCallback(WordList * wl, WordDBCursor &, +// const WordReference * w, Object & d ) +// +int +DeleteCallback(WordList * wl, + WordDBCursor &, + const WordReference * w, + Object & d) +{ + CallbackData & data = ((CallbackData &)d); + HtWordReference * ht_wr = (HtWordReference *)w; + String docIDKey; + + docIDKey << ht_wr->DocID(); + if ((data.dup_ids)->Exists(docIDKey)) + (data.word_db)->Delete(*ht_wr); + + return OK; +} + //***************************************************************************** // void mergeDB() // @@@@ -316,8 +374,6 @@@@ // OK, after merging the doc DBs, we do the same for the words HtWordList mergeWordDB(*config), wordDB(*config); - List *words; - String docIDKey; if (wordDB.Open(config->Find("word_db"), O_RDWR) < 0) { @@@@ -332,33 +388,24 @@@@ } // Start the merging by going through all the URLs that are in - // the database to be merged - - words = mergeWordDB.WordRefs(); + // the database to be merged + WordCursor *mergeCursor; + WordKey empty; - words->Start_Get(); - HtWordReference *word; - while ((word = (HtWordReference *) words->Get_Next())) { - docIDKey = word->DocID(); - if (merge_dup_ids.Exists(docIDKey)) - continue; - - word->DocID(word->DocID() + docIDOffset); - wordDB.Override(*word); + CallbackData data(&wordDB, &merge_dup_ids, docIDOffset); + mergeCursor = mergeWordDB.Cursor(empty, OverrideCallback, (Object *)&data); + mergeCursor->Walk(); + delete mergeCursor; } - delete words; - words = wordDB.WordRefs(); - words->Start_Get(); - while ((word = (HtWordReference *) words->Get_Next())) { - docIDKey = word->DocID(); - if (db_dup_ids.Exists(docIDKey)) - wordDB.Delete(*word); + CallbackData data(&wordDB, &db_dup_ids, 0); + mergeCursor = wordDB.Cursor(empty,DeleteCallback, (Object *)&data); + mergeCursor->Walk(); + delete mergeCursor; } - delete words; - + // Cleanup--just close the two word databases mergeWordDB.Close(); wordDB.Close(); Index: installdir/htdig.conf --- installdir/htdig.conf.orig 2004-02-08 11:19:33 +0100 +++ installdir/htdig.conf 2006-06-17 22:25:21 +0200 @@@@ -47,7 +47,7 @@@@ # long list of URLs, it may be wise to replace it with something like # http://www. or comment this out and use the compiled-in default. # -common_url_parts: ${limit_urls_to} .html .htm .shtml +common_url_parts: ${limit_urls_to} .html .htm .shtml .php # # If there are particular pages that you definitely do NOT want to index, you @@@@ -70,7 +70,7 @@@@ # actual strings. # bad_extensions: .wav .gz .z .sit .au .zip .tar .hqx .exe .com .gif \ - .jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi .css + .jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi .css .js .png .ico # # The string htdig will send in every request to identify the robot. Change Index: installdir/rundig --- installdir/rundig.orig 2003-12-29 09:49:05 +0100 +++ installdir/rundig 2006-06-17 22:25:21 +0200 @@@@ -30,7 +30,6 @@@@ done # If -a specified, note the database directory to move the temp files correctly -# TODO: Should also check for files relative to COMMONDIR. if [ -f "$conffile" ] then new_db_dir=`awk '/^[^#a-zA-Z]*database_dir/ { print $NF }' < $conffile` @@@@ -38,6 +37,11 @@@@ then DBDIR=$new_db_dir fi + new_dir=`awk '/^[^#a-zA-Z]*common_dir/ { print $NF }' < $conffile` + if [ "$new_dir" != "" ] + then + COMMONDIR=$new_dir + fi else echo "Config file $conffile cannot be found" exit 1 @ 1.8 log @fix syntax and add ICOn files, too @ text @d3 1 a3 1 +++ htdig/ExternalParser.cc 2006-03-23 19:47:16 +0100 d24 1 a24 1 +++ htnet/HtHTTP.cc 2006-03-23 19:46:42 +0100 d84 1 a84 1 +++ htnet/HtHTTP.h 2006-03-23 19:46:42 +0100 d95 1 a95 1 +++ htnet/SSLConnection.cc 2006-03-23 19:46:42 +0100 d112 15 d129 1 a129 1 +++ htsearch/Display.cc 2006-03-23 19:46:37 +0100 d192 1 a192 1 +++ httools/htmerge.cc 2006-03-23 19:46:37 +0100 d313 21 d336 1 a336 1 +++ installdir/rundig 2006-03-23 19:46:37 +0100 a356 21 Index: installdir/htdig.conf --- installdir/htdig.conf.orig 2004-02-08 11:19:33 +0100 +++ installdir/htdig.conf 2006-03-24 10:30:15 +0100 @@@@ -47,7 +47,7 @@@@ # long list of URLs, it may be wise to replace it with something like # http://www. or comment this out and use the compiled-in default. # -common_url_parts: ${limit_urls_to} .html .htm .shtml +common_url_parts: ${limit_urls_to} .html .htm .shtml .php # # If there are particular pages that you definitely do NOT want to index, you @@@@ -70,7 +70,7 @@@@ # actual strings. # bad_extensions: .wav .gz .z .sit .au .zip .tar .hqx .exe .com .gif \ - .jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi .css + .jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi .css .js .png .ico # # The string htdig will send in every request to identify the robot. Change @ 1.7 log @PNG, JavaScript and PHP files are rather common nowadays on websites, too @ text @d338 1 a338 1 + .jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi .css *.js *.png @ 1.6 log @remove an obsolete patch and add a bunch of patches from the vendor patch area @ text @d321 21 @ 1.5 log @upgrading package: htdig 3.2.0b3 -> 3.2.0b5 @ text @d1 98 a98 5 --- htlib/HtRegexReplace.cc.orig 2001-02-23 01:31:32.000000000 +0100 +++ htlib/HtRegexReplace.cc 2002-12-13 14:33:07.000000000 +0100 @@@@ -20,7 +20,7 @@@@ { } d100 72 a171 15 -HtRegexReplace::HtRegexReplace(const char *from, const char *to, int case_sensitive = 0) +HtRegexReplace::HtRegexReplace(const char *from, const char *to, int case_sensitive) : HtRegex(from, case_sensitive) { memset(®s, 0, sizeof(regs)); --- htlib/HtRegexReplaceList.cc.orig 2001-02-23 01:31:32.000000000 +0100 +++ htlib/HtRegexReplaceList.cc 2002-12-13 14:36:42.000000000 +0100 @@@@ -18,7 +18,7 @@@@ #include "HtRegexReplaceList.h" #include -HtRegexReplaceList::HtRegexReplaceList(StringList &list, int case_sensitive = 0) +HtRegexReplaceList::HtRegexReplaceList(StringList &list, int case_sensitive) { if (list.Count() & 1) d173 7 a179 2 @@@@ -49,7 +49,7 @@@@ // replacers gets chucked away d182 60 a241 10 -int HtRegexReplaceList::replace(String &str, int nullpattern = 0, int nullstr = 0) +int HtRegexReplaceList::replace(String &str, int nullpattern, int nullstr) { int repCount = replacers.Count(); int doneCount = 0; --- htcommon/DocumentDB.cc.orig 2001-02-23 01:31:29.000000000 +0100 +++ htcommon/DocumentDB.cc 2002-12-13 14:37:01.000000000 +0100 @@@@ -120,7 +120,7 @@@@ // We will attempt to open up an existing document database, // and accompanying index database and excerpt database d243 6 a248 62 -int DocumentDB::Read(const String& filename, const String& indexfilename = 0, const String& headfilename = 0) +int DocumentDB::Read(const String& filename, const String& indexfilename, const String& headfilename) { // If the database is already open, we'll close it // We might be opening this object with a new filename, so we'll be safe --- htcommon/HtConfiguration.cc.orig 2001-02-23 01:31:29.000000000 +0100 +++ htcommon/HtConfiguration.cc 2002-12-16 09:29:32.000000000 +0100 @@@@ -150,7 +150,7 @@@@ //********************************************************************* int HtConfiguration::Value(const char *blockName, const char *name, - const char *value, int default_value = 0) { + const char *value, int default_value) { int retValue=default_value; String tmpStr=Find(blockName,name,value); if (tmpStr[0]!=0) { @@@@ -161,7 +161,7 @@@@ //********************************************************************* double HtConfiguration::Double(const char *blockName, const char *name, - const char *value, double default_value = 0) { + const char *value, double default_value) { double retValue=default_value; String tmpStr=Find(blockName,name,value); if (tmpStr[0]!=0) { @@@@ -172,7 +172,7 @@@@ //********************************************************************* int HtConfiguration::Boolean(const char *blockName, const char *name, - const char *value, int default_value = 0) { + const char *value, int default_value) { int retValue=default_value; String tmpStr=Find(blockName,name,value); if (tmpStr[0]!=0) { @@@@ -192,7 +192,7 @@@@ //********************************************************************* //********************************************************************* int HtConfiguration::Value(URL *aUrl, const char *value, - int default_value = 0) { + int default_value) { int retValue=default_value; String tmpStr=Find(aUrl,value); if (tmpStr[0]!=0) { @@@@ -203,7 +203,7 @@@@ //********************************************************************* double HtConfiguration::Double(URL *aUrl,const char *value, - double default_value = 0) { + double default_value) { double retValue=default_value; String tmpStr=Find(aUrl,value); if (tmpStr[0]!=0) { @@@@ -214,7 +214,7 @@@@ //********************************************************************* int HtConfiguration::Boolean(URL *aUrl,const char *value, - int default_value = 0) { + int default_value) { int retValue=default_value; String tmpStr=Find(aUrl,value); if (tmpStr[0]!=0) { @@@@ -249,7 +249,6 @@@@ d250 4 a253 84 //******************************************************************** // -inline String HtConfiguration::ParseString(const char *str) const { return ParsedString(str).get(dcGlobalVars); } --- htcommon/HtConfiguration.h.orig 2001-02-23 01:31:29.000000000 +0100 +++ htcommon/HtConfiguration.h 2002-12-16 09:52:42.000000000 +0100 @@@@ -50,7 +50,6 @@@@ int Value(URL *aUrl,const char *value,int default_value = 0); double Double(URL *aUrl,const char *value,double default_value = 0); int Boolean(URL *aUrl,const char *value,int default_value = 0); - inline String ParseString(const char*) const; // parse ${var} string String getFileName() const { return FileName; } --- htlib/HtDateTime.h.orig 2001-02-23 01:31:32.000000000 +0100 +++ htlib/HtDateTime.h 2002-12-16 10:00:25.000000000 +0100 @@@@ -182,8 +182,8 @@@@ void SetDateTime(const time_t *t) { Ht_t = *t; } // by pointer // Set object time_t value from a struct tm - inline void SetDateTime(struct tm *); // by pointer - inline void SetDateTime(struct tm &t) { SetDateTime(&t);} // by reference + void SetDateTime(struct tm *); // by pointer + void SetDateTime(struct tm &t) { SetDateTime(&t);} // by reference // Set GM Time from single values input // Return true if it all went good, false else @@@@ -311,8 +311,8 @@@@ // For comparisons - between objects of the same class - inline bool operator==(const HtDateTime &right) const; - inline bool operator<(const HtDateTime &right) const; + bool operator==(const HtDateTime &right) const; + bool operator<(const HtDateTime &right) const; bool operator!=(const HtDateTime &right) const {return !( *this == right );} @@@@ -350,8 +350,8 @@@@ // For Copy - inline HtDateTime &operator=(const HtDateTime &right); - inline HtDateTime &operator=(const int right); + HtDateTime &operator=(const HtDateTime &right); + HtDateTime &operator=(const int right); --- htdig/ExternalTransport.h.orig 2001-02-23 01:31:30.000000000 +0100 +++ htdig/ExternalTransport.h 2002-12-16 10:11:01.000000000 +0100 @@@@ -26,7 +26,7 @@@@ class ExternalTransport; class ExternalTransport_Response : public Transport_Response { - friend ExternalTransport; + friend class ExternalTransport; // Nothing else... We just want it so we can access the protected fields }; --- htsearch/Display.cc.orig 2001-02-23 01:31:34.000000000 +0100 +++ htsearch/Display.cc 2002-12-16 10:37:22.000000000 +0100 @@@@ -36,6 +36,8 @@@@ #include #include #include +#include +#include #if !defined(DBL_MAX) && defined(MAXFLOAT) # define DBL_MAX MAXFLOAT --- htsearch/htsearch.cc.orig Fri Feb 23 01:31:35 2001 +++ htsearch/htsearch.cc Fri Jun 13 09:09:20 2003 @@@@ -315,8 +315,7 @@@@ String doc_index = config["doc_index"]; if (access((char*)doc_index, R_OK) < 0) { - reportError(form("Unable to read document index file '%s'\nDid you run h -tmerge?", + reportError(form("Unable to read document index file '%s'\nDid you run htmerge?", doc_index.get())); } d255 66 @ 1.4 log @pastime: fix building @ text @@ 1.3 log @upgrade to PLUS class @ text @d174 12 @ 1.3.2.1 log @mass Merge-From-CURRENT (MFC) in preparation for OpenPKG 1.3 [class PLUS only] @ text @a173 12 --- htsearch/htsearch.cc.orig Fri Feb 23 01:31:35 2001 +++ htsearch/htsearch.cc Fri Jun 13 09:09:20 2003 @@@@ -315,8 +315,7 @@@@ String doc_index = config["doc_index"]; if (access((char*)doc_index, R_OK) < 0) { - reportError(form("Unable to read document index file '%s'\nDid you run h -tmerge?", + reportError(form("Unable to read document index file '%s'\nDid you run htmerge?", doc_index.get())); } @ 1.2 log @flush @ text @d165 1 a165 1 @@@@ -36,6 +36,7 @@@@ d169 2 a170 1 +#include @ 1.1 log @flush @ text @d44 1 a44 1 +++ htcommon/HtConfiguration.cc 2002-12-13 14:38:08.000000000 +0100 d99 74 @