src/shepp/SheppStrUtil.H

Go to the documentation of this file.
00001 /* ${copyright}$ */
00002 /* $Id: SheppStrUtil.H 910 2007-03-19 21:29:33Z eduardo $ */
00007 #ifndef __SHEPP_STR_UTIL_H__
00008 #define __SHEPP_STR_UTIL_H__
00009 
00010 #include <string>
00011 #include <vector>
00012 
00013 #ifndef whitespace
00014 #define whitespace(c) (((c) == ' ') || ((c) == '\t'))
00015 #define LIMITED_READLINE
00016 #endif //whitespace
00017 
00018 using std::string;
00019 using std::vector;
00020 
00022 class SheppStrUtil
00023 {
00024 public:
00026 
00030   static char *trim(char *line)
00031   {
00032     
00033     // remove leading white spaces by incrementing pointer to beginning
00034     // of line
00035     char *head;
00036     for (head = line; whitespace(*head); head++)
00037       ;
00038     
00039     // returns if head points to the end of line
00040     if (*head == '\0') {
00041       return head;
00042     }
00043     
00044     // remove ending white spaces
00045     char *tail;
00046     tail = head + strlen(head) - 1;
00047     while (tail > head && whitespace(*tail)) {
00048       tail--;
00049     }
00050     
00051     // ends the string at position where a white space was last seen
00052     *++tail = '\0';
00053     
00054     return head;
00055   }
00056   
00058 
00063   static vector<string> parse_line(char *line)
00064   {
00065     vector<string> words;
00066     int from;
00067     int to;
00068     char word[MAX_WORD_LENGTH + 1];
00069     bool loop = true;
00070     
00071     do {
00072       from = 0;
00073       to = 1;
00074       
00075       // ignore leading white spaces
00076       while (line[from] != '\0' && whitespace(line[from])) {
00077         line++;
00078       }
00079       
00080       // find end of this word
00081       while (line[to] != '\0' && !whitespace(line[to])) {
00082         to++;
00083       }
00084       
00085       // boundary check
00086       if (to - from > MAX_WORD_LENGTH) {
00087         printf("Error: MAX_WORD_LENGTH is %d.\n", MAX_WORD_LENGTH);
00088         words.clear();
00089         break;
00090       }
00091 
00092       // insert found word in vector
00093       strncpy(word, line, to - from);
00094       word[to - from] = '\0';
00095       if (strlen(word) > 0) {
00096         words.push_back((string) word);
00097       }
00098 
00099       if (line[to] == '\0') {
00100         // this is the last word
00101         loop = false;
00102       } else {
00103         // go to beginning of next word
00104         line += to + 1;
00105       }
00106     } while (loop);
00107     
00108     return words;
00109   }
00110 
00120   static int split(string input, string &first, string &second,
00121                    string splitter, bool relaxed = false)
00122   {
00123     int split_pos = input.find(splitter, 0);
00124     
00125     // first can never be empty
00126     if (split_pos == 0) {
00127       return -1;
00128     }
00129     first = input.substr(0, split_pos);
00130     
00131     // second can be empty if relaxed is true
00132     if (split_pos == -1 ||
00133         split_pos == (int) (input.length() - 1)) {
00134       if (relaxed) {
00135         second = "";
00136       } else {
00137         return -1;
00138       }
00139     } else {
00140       second = input.substr(split_pos + 1);
00141     }
00142     
00143     return 0;  
00144   }
00145 
00147 
00152   static int quote_gathering(vector<string> &words, string &gather)
00153   {
00154     string tmp_str = gather.substr(0, 1);
00155 
00156     if (words.empty() && tmp_str == "\"") {
00157       return -1;
00158     }
00159 
00160     if (tmp_str == "\"") {
00161       gather = gather.substr(1, gather.length() - 1);
00162 
00163       tmp_str = gather.substr(gather.length() - 1);
00164       while (tmp_str != "\"") {
00165         if (words.empty()) {
00166           return -1;
00167         }
00168 
00169         gather += " " + words[0];
00170 
00171         words.erase(words.begin());
00172         tmp_str = gather.substr(gather.length() - 1);
00173       }
00174 
00175       //remove starting and ending quotes
00176       gather = gather.substr(0, gather.length() - 1);
00177     }
00178 
00179     return 0;
00180   }
00181 
00183 
00187   static string doc2id(const string &doc)
00188   {
00189     string numbers = doc;
00190 
00191     StrUtil::gsub(numbers, ".", "");
00192     StrUtil::gsub(numbers, "/", "");
00193     StrUtil::gsub(numbers, "-", "");
00194 
00195     return numbers;
00196   }
00197 };
00198 
00199 #endif //__SHEPP_STR_UTIL_H__

Generated on Tue Mar 17 16:03:07 2009 for libepp_nicbr by  doxygen 1.4.7