00001
00002
00007 #ifndef __SHEPP_STR_UTIL_H__
00008 #define __SHEPP_STR_UTIL_H__
00009
00010 #include <string>
00011 #include <vector>
00012
00013 #ifndef whitespace
00014 #define whitespace(c) (((c) == ' ') || ((c) == '\t'))
00015 #define LIMITED_READLINE
00016 #endif //whitespace
00017
00018 using std::string;
00019 using std::vector;
00020
00022 class SheppStrUtil
00023 {
00024 public:
00026
00030 static char *trim(char *line)
00031 {
00032
00033
00034
00035 char *head;
00036 for (head = line; whitespace(*head); head++)
00037 ;
00038
00039
00040 if (*head == '\0') {
00041 return head;
00042 }
00043
00044
00045 char *tail;
00046 tail = head + strlen(head) - 1;
00047 while (tail > head && whitespace(*tail)) {
00048 tail--;
00049 }
00050
00051
00052 *++tail = '\0';
00053
00054 return head;
00055 }
00056
00058
00063 static vector<string> parse_line(char *line)
00064 {
00065 vector<string> words;
00066 int from;
00067 int to;
00068 char word[MAX_WORD_LENGTH + 1];
00069 bool loop = true;
00070
00071 do {
00072 from = 0;
00073 to = 1;
00074
00075
00076 while (line[from] != '\0' && whitespace(line[from])) {
00077 line++;
00078 }
00079
00080
00081 while (line[to] != '\0' && !whitespace(line[to])) {
00082 to++;
00083 }
00084
00085
00086 if (to - from > MAX_WORD_LENGTH) {
00087 printf("Error: MAX_WORD_LENGTH is %d.\n", MAX_WORD_LENGTH);
00088 words.clear();
00089 break;
00090 }
00091
00092
00093 strncpy(word, line, to - from);
00094 word[to - from] = '\0';
00095 if (strlen(word) > 0) {
00096 words.push_back((string) word);
00097 }
00098
00099 if (line[to] == '\0') {
00100
00101 loop = false;
00102 } else {
00103
00104 line += to + 1;
00105 }
00106 } while (loop);
00107
00108 return words;
00109 }
00110
00120 static int split(string input, string &first, string &second,
00121 string splitter, bool relaxed = false)
00122 {
00123 int split_pos = input.find(splitter, 0);
00124
00125
00126 if (split_pos == 0) {
00127 return -1;
00128 }
00129 first = input.substr(0, split_pos);
00130
00131
00132 if (split_pos == -1 ||
00133 split_pos == (int) (input.length() - 1)) {
00134 if (relaxed) {
00135 second = "";
00136 } else {
00137 return -1;
00138 }
00139 } else {
00140 second = input.substr(split_pos + 1);
00141 }
00142
00143 return 0;
00144 }
00145
00147
00152 static int quote_gathering(vector<string> &words, string &gather)
00153 {
00154 string tmp_str = gather.substr(0, 1);
00155
00156 if (words.empty() && tmp_str == "\"") {
00157 return -1;
00158 }
00159
00160 if (tmp_str == "\"") {
00161 gather = gather.substr(1, gather.length() - 1);
00162
00163 tmp_str = gather.substr(gather.length() - 1);
00164 while (tmp_str != "\"") {
00165 if (words.empty()) {
00166 return -1;
00167 }
00168
00169 gather += " " + words[0];
00170
00171 words.erase(words.begin());
00172 tmp_str = gather.substr(gather.length() - 1);
00173 }
00174
00175
00176 gather = gather.substr(0, gather.length() - 1);
00177 }
00178
00179 return 0;
00180 }
00181
00183
00187 static string doc2id(const string &doc)
00188 {
00189 string numbers = doc;
00190
00191 StrUtil::gsub(numbers, ".", "");
00192 StrUtil::gsub(numbers, "/", "");
00193 StrUtil::gsub(numbers, "-", "");
00194
00195 return numbers;
00196 }
00197 };
00198
00199 #endif //__SHEPP_STR_UTIL_H__