libepp_nicbr
SheppStrUtil.H
Go to the documentation of this file.
1 /* ${copyright}$ */
2 /* $Id: SheppStrUtil.H 1224 2014-11-06 12:42:42Z rafael $ */
7 #ifndef __SHEPP_STR_UTIL_H__
8 #define __SHEPP_STR_UTIL_H__
9 
10 #include <string>
11 #include <vector>
12 
13 #ifndef whitespace
14 #define whitespace(c) (((c) == ' ') || ((c) == '\t'))
15 #define LIMITED_READLINE
16 #endif //whitespace
17 
18 using std::string;
19 using std::vector;
20 
23 {
24 public:
26 
30  static char *trim(char *line)
31  {
32 
33  // remove leading white spaces by incrementing pointer to beginning
34  // of line
35  char *head;
36  for (head = line; whitespace(*head); head++)
37  ;
38 
39  // returns if head points to the end of line
40  if (*head == '\0') {
41  return head;
42  }
43 
44  // remove ending white spaces
45  char *tail;
46  tail = head + strlen(head) - 1;
47  while (tail > head && whitespace(*tail)) {
48  tail--;
49  }
50 
51  // ends the string at position where a white space was last seen
52  *++tail = '\0';
53 
54  return head;
55  }
56 
58 
63  static vector<string> parse_line(char *line)
64  {
65  vector<string> words;
66  int from;
67  int to;
68  char word[MAX_WORD_LENGTH + 1];
69  bool loop = true;
70 
71  do {
72  from = 0;
73  to = 1;
74 
75  // ignore leading white spaces
76  while (line[from] != '\0' && whitespace(line[from])) {
77  line++;
78  }
79 
80  // find end of this word
81  while (line[to] != '\0' && !whitespace(line[to])) {
82  to++;
83  }
84 
85  // boundary check
86  if (to - from > MAX_WORD_LENGTH) {
87  printf("Error: MAX_WORD_LENGTH is %d.\n", MAX_WORD_LENGTH);
88  words.clear();
89  break;
90  }
91 
92  // insert found word in vector
93  strncpy(word, line, to - from);
94  word[to - from] = '\0';
95  if (strlen(word) > 0) {
96  words.push_back((string) word);
97  }
98 
99  if (line[to] == '\0') {
100  // this is the last word
101  loop = false;
102  } else {
103  // go to beginning of next word
104  line += to + 1;
105  }
106  } while (loop);
107 
108  return words;
109  }
110 
120  static int split(string input, string &first, string &second,
121  string splitter, bool relaxed = false)
122  {
123  int split_pos = input.find(splitter, 0);
124 
125  // first can never be empty
126  if (split_pos == 0) {
127  return -1;
128  }
129  first = input.substr(0, split_pos);
130 
131  // second can be empty if relaxed is true
132  if (split_pos == -1 ||
133  split_pos == (int) (input.length() - 1)) {
134  if (relaxed) {
135  second = "";
136  } else {
137  return -1;
138  }
139  } else {
140  second = input.substr(split_pos + 1);
141  }
142 
143  return 0;
144  }
145 
147 
152  static int quote_gathering(vector<string> &words, string &gather)
153  {
154  // Safety check fo empty gather
155  if (gather.empty()) {
156  return -1;
157  }
158 
159  string tmp_str = gather.substr(0, 1);
160 
161  // Will fail if there're no more words, and the string starts with
162  // quotes and don't end with quotes. It will also fails if the
163  // string is only a quote character
164  if (words.empty() && tmp_str == "\"" &&
165  (gather.length() == 1 || gather.substr(gather.length()-1) != "\"")) {
166  return -1;
167  }
168 
169  if (tmp_str == "\"") {
170  gather = gather.substr(1, gather.length() - 1);
171 
172  tmp_str = gather.substr(gather.length() - 1);
173  while (tmp_str != "\"") {
174  if (words.empty()) {
175  return -1;
176  }
177 
178  gather += " " + words[0];
179 
180  words.erase(words.begin());
181  tmp_str = gather.substr(gather.length() - 1);
182  }
183 
184  //remove starting and ending quotes
185  gather = gather.substr(0, gather.length() - 1);
186  }
187 
188  return 0;
189  }
190 
192 
196  static string doc2id(const string &doc)
197  {
198  string numbers = doc;
199 
200  StrUtil::gsub(numbers, ".", "");
201  StrUtil::gsub(numbers, "/", "");
202  StrUtil::gsub(numbers, "-", "");
203 
204  return numbers;
205  }
206 };
207 
208 #endif //__SHEPP_STR_UTIL_H__
static int split(string input, string &first, string &second, string splitter, bool relaxed=false)
Definition: SheppStrUtil.H:120
static vector< string > parse_line(char *line)
Given a line, split its words by white space into a string vector (shepp)
Definition: SheppStrUtil.H:63
Useful string manipulation routines used by shepp.
Definition: SheppStrUtil.H:22
static char * trim(char *line)
Removes leading and ending white spaces from line (shepp)
Definition: SheppStrUtil.H:30
static int gsub(string &buffer, const char *pat, const char *drp)
Used for substitution of pat for drp within buffer.
static int quote_gathering(vector< string > &words, string &gather)
Gathers command-line arguments bounded by quotes in a string.
Definition: SheppStrUtil.H:152
static string doc2id(const string &doc)
Removes chars [./-] from document strings.
Definition: SheppStrUtil.H:196