ROS 2 rclcpp + rcl - kilted  kilted
ROS 2 C++ Client Library with ROS Client Library
lexer.c
1 // Copyright 2018 Open Source Robotics Foundation, Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "rcl/error_handling.h"
16 #include "rcl/lexer.h"
17 
18 /* The lexer tries to find a lexeme in a string.
19  * It looks at one character at a time, and uses that character's value to decide how to transition
20  * a state machine.
21  * A transition is taken if a character's ASCII value falls within its range.
22  * There is never more than one matching transition.
23  *
24  * If no transition matches then it uses a state's '<else,M>' transition.
25  * Every state has exactly one '<else,M>' transition.
26  * In the diagram below all states have an `<else,0>` to T_NONE unless otherwise specified.
27  *
28  * When a transition is taken it causes the lexer to move to another character in the string.
29  * Normal transitions always move the lexer forwards one character.
30  * '<else,M>' transitions may cause the lexer to move forwards 1, or backwards N.
31  * The movement M is written as M = 1 + N so it can be stored in an unsigned integer.
32  * For example, an `<else>` transition with M = 0 moves the lexer forwards 1 character, M = 1 keeps
33  * the lexer at the current character, and M = 2 moves the lexer backwards one character.
34 
35 digraph remapping_lexer {
36  rankdir=LR;
37  node [shape = box, fontsize = 7];
38  T_TILDE_SLASH
39  T_URL_SERVICE
40  T_URL_TOPIC
41  T_COLON
42  T_NODE
43  T_NS
44  T_SEPARATOR
45  T_BR1
46  T_BR2
47  T_BR3
48  T_BR4
49  T_BR5
50  T_BR6
51  T_BR7
52  T_BR8
53  T_BR9
54  T_TOKEN
55  T_FORWARD_SLASH
56  T_WILD_ONE
57  T_WILD_MULTI
58  T_EOF
59  T_NONE
60  T_DOT
61  node [shape = circle];
62  S0 -> T_FORWARD_SLASH [ label = "/"];
63  S0 -> T_DOT [ label = "."];
64  S0 -> S1 [ label = "\\"];
65  S0 -> S2 [ label = "~"];
66  S0 -> S3 [ label = "_" ];
67  S0 -> S9 [ label = "a-qs-zA-Z"];
68  S0 -> S11 [ label = "r"];
69  S0 -> S30 [ label = "*"];
70  S0 -> S31 [ label = ":"];
71  S1 -> T_BR1 [ label = "1"];
72  S1 -> T_BR2 [ label = "2"];
73  S1 -> T_BR3 [ label = "3"];
74  S1 -> T_BR4 [ label = "4"];
75  S1 -> T_BR5 [ label = "5"];
76  S1 -> T_BR6 [ label = "6"];
77  S1 -> T_BR7 [ label = "7"];
78  S1 -> T_BR8 [ label = "8"];
79  S1 -> T_BR9 [ label = "9"];
80  S2 -> T_TILDE_SLASH [ label ="/" ];
81  S3 -> S4 [ label = "_" ];
82  S3 -> S10 [ label = "<else,1>", color = crimson, fontcolor = crimson];
83  S4 -> S5 [ label = "n" ];
84  S5 -> T_NS [ label = "s"];
85  S5 -> S6 [ label = "o" ];
86  S6 -> S8 [ label = "d" ];
87  S5 -> S7 [ label = "a" ];
88  S7 -> S8 [ label = "m" ];
89  S8 -> T_NODE [ label = "e"];
90  S9 -> T_TOKEN [ label = "<else,1>", color=crimson, fontcolor=crimson];
91  S9 -> S9 [ label = "a-zA-Z0-9"];
92  S9 -> S10 [ label = "_"];
93  S10 -> T_TOKEN [ label = "<else,1>", color=crimson, fontcolor=crimson];
94  S10 -> S9 [ label = "a-zA-Z0-9"];
95  S11 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
96  S11 -> S12 [ label = "o"];
97  S12 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
98  S12 -> S13 [ label = "s"];
99  S13 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
100  S13 -> S14 [ label = "t"];
101  S13 -> S21 [ label = "s"];
102  S14 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
103  S14 -> S15 [ label = "o"];
104  S15 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
105  S15 -> S16 [ label = "p"];
106  S16 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
107  S16 -> S17 [ label = "i"];
108  S17 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
109  S17 -> S18 [ label = "c"];
110  S18 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
111  S18 -> S19 [ label = ":"];
112  S19 -> S20 [ label = "/"];
113  S19 -> S9 [ label = "<else,2>", color=crimson, fontcolor=crimson];
114  S20 -> T_URL_TOPIC [ label = "/"];
115  S20 -> S9 [ label = "<else,3>", color=crimson, fontcolor=crimson];
116  S21 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
117  S21 -> S22 [ label = "e"];
118  S22 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
119  S22 -> S23 [ label = "r"];
120  S23 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
121  S23 -> S24 [ label = "v"];
122  S24 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
123  S24 -> S25 [ label = "i"];
124  S25 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
125  S25 -> S26 [ label = "c"];
126  S26 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
127  S26 -> S27 [ label = "e"];
128  S27 -> S28 [ label = ":"];
129  S27 -> S9 [ label = "<else,1>", color=crimson, fontcolor=crimson];
130  S28 -> S29 [ label = "/"];
131  S28 -> S9 [ label = "<else,2>", color=crimson, fontcolor=crimson];
132  S29 -> T_URL_SERVICE [ label = "/"];
133  S29 -> S9 [ label = "<else,3>", color=crimson, fontcolor=crimson];
134  S30 -> T_WILD_MULTI[ label = "*"];
135  S30 -> T_WILD_ONE [ label = "<else,1>", color=crimson, fontcolor=crimson];
136  S31 -> T_SEPARATOR [ label = "="];
137  S31 -> T_COLON [ label = "<else,1>", color=crimson, fontcolor=crimson];
138 }
139 */
140 
144 {
146  const unsigned char to_state;
148  const char range_start;
150  const char range_end;
152 
155 typedef struct rcl_lexer_state_s
156 {
158  const unsigned char else_state;
160  const unsigned char else_movement;
164 
165 #define S0 0u
166 #define S1 1u
167 #define S2 2u
168 #define S3 3u
169 #define S4 4u
170 #define S5 5u
171 #define S6 6u
172 #define S7 7u
173 #define S8 8u
174 #define S9 9u
175 #define S10 10u
176 #define S11 11u
177 #define S12 12u
178 #define S13 13u
179 #define S14 14u
180 #define S15 15u
181 #define S16 16u
182 #define S17 17u
183 #define S18 18u
184 #define S19 19u
185 #define S20 20u
186 #define S21 21u
187 #define S22 22u
188 #define S23 23u
189 #define S24 24u
190 #define S25 25u
191 #define S26 26u
192 #define S27 27u
193 #define S28 28u
194 #define S29 29u
195 #define S30 30u
196 #define S31 31u
197 #define LAST_STATE S31
198 
199 #define T_TILDE_SLASH 32u
200 #define T_URL_SERVICE 33u
201 #define T_URL_TOPIC 34u
202 #define T_COLON 35u
203 #define T_NODE 36u
204 #define T_NS 37u
205 #define T_SEPARATOR 38u
206 #define T_BR1 39u
207 #define T_BR2 40u
208 #define T_BR3 41u
209 #define T_BR4 42u
210 #define T_BR5 43u
211 #define T_BR6 44u
212 #define T_BR7 45u
213 #define T_BR8 46u
214 #define T_BR9 47u
215 #define T_TOKEN 48u
216 #define T_FORWARD_SLASH 49u
217 #define T_WILD_ONE 50u
218 #define T_WILD_MULTI 51u
219 #define T_EOF 52u
220 #define T_NONE 53u
221 #define T_DOT 54u
222 
223 // used to figure out if a state is terminal or not
224 #define FIRST_TERMINAL T_TILDE_SLASH
225 #define LAST_TERMINAL T_NONE
226 
227 // Used to mark where the last transition is in a state
228 #define END_TRANSITIONS {0, '\0', '\0'}
229 
230 static const rcl_lexer_state_t g_states[LAST_STATE + 1] =
231 {
232  // S0
233  {
234  T_NONE,
235  0u,
236  {
237  {T_FORWARD_SLASH, '/', '/'},
238  {T_DOT, '.', '.'},
239  {S1, '\\', '\\'},
240  {S2, '~', '~'},
241  {S3, '_', '_'},
242  {S9, 'a', 'q'},
243  {S9, 's', 'z'},
244  {S9, 'A', 'Z'},
245  {S11, 'r', 'r'},
246  {S30, '*', '*'},
247  {S31, ':', ':'},
248  END_TRANSITIONS
249  }
250  },
251  // S1
252  {
253  T_NONE,
254  0u,
255  {
256  {T_BR1, '1', '1'},
257  {T_BR2, '2', '2'},
258  {T_BR3, '3', '3'},
259  {T_BR4, '4', '4'},
260  {T_BR5, '5', '5'},
261  {T_BR6, '6', '6'},
262  {T_BR7, '7', '7'},
263  {T_BR8, '8', '8'},
264  {T_BR9, '9', '9'},
265  END_TRANSITIONS
266  }
267  },
268  // S2
269  {
270  T_NONE,
271  0u,
272  {
273  {T_TILDE_SLASH, '/', '/'},
274  END_TRANSITIONS
275  }
276  },
277  // S3
278  {
279  S10,
280  1u,
281  {
282  {S4, '_', '_'},
283  END_TRANSITIONS
284  }
285  },
286  // S4
287  {
288  T_NONE,
289  0u,
290  {
291  {S5, 'n', 'n'},
292  END_TRANSITIONS
293  }
294  },
295  // S5
296  {
297  T_NONE,
298  0u,
299  {
300  {T_NS, 's', 's'},
301  {S6, 'o', 'o'},
302  {S7, 'a', 'a'},
303  END_TRANSITIONS
304  }
305  },
306  // S6
307  {
308  T_NONE,
309  0u,
310  {
311  {S8, 'd', 'd'},
312  END_TRANSITIONS
313  }
314  },
315  // S7
316  {
317  T_NONE,
318  0u,
319  {
320  {S8, 'm', 'm'},
321  END_TRANSITIONS
322  }
323  },
324  // S8
325  {
326  T_NONE,
327  0u,
328  {
329  {T_NODE, 'e', 'e'},
330  END_TRANSITIONS
331  }
332  },
333  // S9
334  {
335  T_TOKEN,
336  1u,
337  {
338  {S9, 'a', 'z'},
339  {S9, 'A', 'Z'},
340  {S9, '0', '9'},
341  {S10, '_', '_'},
342  END_TRANSITIONS
343  }
344  },
345  // S10
346  {
347  T_TOKEN,
348  1u,
349  {
350  {S9, 'a', 'z'},
351  {S9, 'A', 'Z'},
352  {S9, '0', '9'},
353  END_TRANSITIONS
354  }
355  },
356  // S11
357  {
358  S9,
359  1u,
360  {
361  {S12, 'o', 'o'},
362  END_TRANSITIONS
363  }
364  },
365  // S12
366  {
367  S9,
368  1u,
369  {
370  {S13, 's', 's'},
371  END_TRANSITIONS
372  }
373  },
374  // S13
375  {
376  S9,
377  1u,
378  {
379  {S14, 't', 't'},
380  {S21, 's', 's'},
381  END_TRANSITIONS
382  }
383  },
384  // S14
385  {
386  S9,
387  1u,
388  {
389  {S15, 'o', 'o'},
390  END_TRANSITIONS
391  }
392  },
393  // S15
394  {
395  S9,
396  1u,
397  {
398  {S16, 'p', 'p'},
399  END_TRANSITIONS
400  }
401  },
402  // S16
403  {
404  S9,
405  1u,
406  {
407  {S17, 'i', 'i'},
408  END_TRANSITIONS
409  }
410  },
411  // S17
412  {
413  S9,
414  1u,
415  {
416  {S18, 'c', 'c'},
417  END_TRANSITIONS
418  }
419  },
420  // S18
421  {
422  S9,
423  1u,
424  {
425  {S19, ':', ':'},
426  END_TRANSITIONS
427  }
428  },
429  // S19
430  {
431  S9,
432  2u,
433  {
434  {S20, '/', '/'},
435  END_TRANSITIONS
436  }
437  },
438  // S20
439  {
440  S9,
441  3u,
442  {
443  {T_URL_TOPIC, '/', '/'},
444  END_TRANSITIONS
445  }
446  },
447  // S21
448  {
449  S9,
450  1u,
451  {
452  {S22, 'e', 'e'},
453  END_TRANSITIONS
454  }
455  },
456  // S21
457  {
458  S9,
459  1u,
460  {
461  {S23, 'r', 'r'},
462  END_TRANSITIONS
463  }
464  },
465  // S23
466  {
467  S9,
468  1u,
469  {
470  {S24, 'v', 'v'},
471  END_TRANSITIONS
472  }
473  },
474  // S24
475  {
476  S9,
477  1u,
478  {
479  {S25, 'i', 'i'},
480  END_TRANSITIONS
481  }
482  },
483  // S25
484  {
485  S9,
486  1u,
487  {
488  {S26, 'c', 'c'},
489  END_TRANSITIONS
490  }
491  },
492  // S26
493  {
494  S9,
495  1u,
496  {
497  {S27, 'e', 'e'},
498  END_TRANSITIONS
499  }
500  },
501  // S27
502  {
503  S9,
504  1u,
505  {
506  {S28, ':', ':'},
507  END_TRANSITIONS
508  }
509  },
510  // S28
511  {
512  S9,
513  2u,
514  {
515  {S29, '/', '/'},
516  END_TRANSITIONS
517  }
518  },
519  // S29
520  {
521  S9,
522  3u,
523  {
524  {T_URL_SERVICE, '/', '/'},
525  END_TRANSITIONS
526  }
527  },
528  // S30
529  {
530  T_WILD_ONE,
531  1u,
532  {
533  {T_WILD_MULTI, '*', '*'},
534  END_TRANSITIONS
535  }
536  },
537  // S31
538  {
539  T_COLON,
540  1u,
541  {
542  {T_SEPARATOR, '=', '='},
543  END_TRANSITIONS
544  }
545  }
546 };
547 
548 static const rcl_lexeme_t g_terminals[LAST_TERMINAL + 1] = {
549  // 0
551  // 1
553  // 2
555  // 3
557  // 4
559  // 5
561  // 6
563  // 7
565  // 8
567  // 9
569  // 10
571  // 11
573  // 12
575  // 13
577  // 14
579  // 15
581  // 16
583  // 17
585  // 18
587  // 19
589  // 20
591  // 21
593  // 22
595 };
596 
597 rcl_ret_t
599  const char * text,
600  rcl_lexeme_t * lexeme,
601  size_t * length)
602 {
603  RCUTILS_CAN_SET_MSG_AND_RETURN_WITH_ERROR_OF(RCL_RET_INVALID_ARGUMENT);
604  RCUTILS_CAN_SET_MSG_AND_RETURN_WITH_ERROR_OF(RCL_RET_ERROR);
605 
606  RCL_CHECK_ARGUMENT_FOR_NULL(text, RCL_RET_INVALID_ARGUMENT);
607  RCL_CHECK_ARGUMENT_FOR_NULL(lexeme, RCL_RET_INVALID_ARGUMENT);
608  RCL_CHECK_ARGUMENT_FOR_NULL(length, RCL_RET_INVALID_ARGUMENT);
609 
610  *length = 0u;
611 
612  if ('\0' == text[0u]) {
613  // Early exit if string is empty
614  *lexeme = RCL_LEXEME_EOF;
615  return RCL_RET_OK;
616  }
617 
618  const rcl_lexer_state_t * state;
619  char current_char;
620  size_t next_state = S0;
621  size_t movement;
622 
623  // Analyze one character at a time until lexeme is found
624  do {
625  if (next_state > LAST_STATE) {
626  // Should never happen
627  RCL_SET_ERROR_MSG("Internal lexer bug: next state does not exist");
628  return RCL_RET_ERROR;
629  }
630  state = &(g_states[next_state]);
631  current_char = text[*length];
632  next_state = 0u;
633  movement = 0u;
634 
635  // Look for a transition that contains this character in its range
636  size_t transition_idx = 0u;
637  const rcl_lexer_transition_t * transition;
638  do {
639  transition = &(state->transitions[transition_idx]);
640  if (transition->range_start <= current_char && transition->range_end >= current_char) {
641  next_state = transition->to_state;
642  break;
643  }
644  ++transition_idx;
645  } while (0u != transition->to_state);
646 
647  // if no transition was found, take the else transition
648  if (0u == next_state) {
649  next_state = state->else_state;
650  movement = state->else_movement;
651  }
652 
653  if (0u == movement) {
654  if ('\0' != current_char) {
655  // Go forwards 1 char as long as the end hasn't been reached
656  ++(*length);
657  }
658  } else {
659  // Go backwards N chars
660  if (movement - 1u > *length) {
661  // Should never happen
662  RCL_SET_ERROR_MSG("Internal lexer bug: movement would read before start of string");
663  return RCL_RET_ERROR;
664  }
665  *length -= movement - 1u;
666  }
667  } while (next_state < FIRST_TERMINAL);
668 
669  if (FIRST_TERMINAL > next_state || next_state - FIRST_TERMINAL > LAST_TERMINAL) {
670  // Should never happen
671  RCL_SET_ERROR_MSG("Internal lexer bug: terminal state does not exist");
672  return RCL_RET_ERROR;
673  }
674  *lexeme = g_terminals[next_state - FIRST_TERMINAL];
675  return RCL_RET_OK;
676 }
@ RCL_LEXEME_WILD_ONE
Definition: lexer.h:76
@ RCL_LEXEME_NS
__ns
Definition: lexer.h:50
@ RCL_LEXEME_TOKEN
a name between slashes, must match (([a-zA-Z](_)?)|_)([0-9a-zA-Z](_)?)*
Definition: lexer.h:72
@ RCL_LEXEME_NONE
Indicates no valid lexeme was found (end of input not reached)
Definition: lexer.h:36
@ RCL_LEXEME_EOF
Indicates end of input has been reached.
Definition: lexer.h:38
@ RCL_LEXEME_URL_TOPIC
rostopic://
Definition: lexer.h:44
@ RCL_LEXEME_BR2
\2
Definition: lexer.h:56
@ RCL_LEXEME_BR8
\8
Definition: lexer.h:68
@ RCL_LEXEME_BR4
\4
Definition: lexer.h:60
@ RCL_LEXEME_BR9
\9
Definition: lexer.h:70
@ RCL_LEXEME_URL_SERVICE
rosservice://
Definition: lexer.h:42
@ RCL_LEXEME_BR1
\1
Definition: lexer.h:54
@ RCL_LEXEME_BR6
\6
Definition: lexer.h:64
@ RCL_LEXEME_TILDE_SLASH
~/
Definition: lexer.h:40
@ RCL_LEXEME_FORWARD_SLASH
/
Definition: lexer.h:74
@ RCL_LEXEME_SEPARATOR
:=
Definition: lexer.h:52
@ RCL_LEXEME_BR3
\3
Definition: lexer.h:58
@ RCL_LEXEME_BR5
\5
Definition: lexer.h:62
@ RCL_LEXEME_BR7
\7
Definition: lexer.h:66
@ RCL_LEXEME_NODE
__node or __name
Definition: lexer.h:48
@ RCL_LEXEME_DOT
.
Definition: lexer.h:82
@ RCL_LEXEME_WILD_MULTI
**
Definition: lexer.h:78
@ RCL_LEXEME_COLON
:
Definition: lexer.h:46
enum rcl_lexeme_e rcl_lexeme_t
Type of lexeme found by lexical analysis.
RCL_PUBLIC RCL_WARN_UNUSED rcl_ret_t rcl_lexer_analyze(const char *text, rcl_lexeme_t *lexeme, size_t *length)
Do lexical analysis on a string.
Definition: lexer.c:598
const unsigned char else_movement
Movement associated with taking else state.
Definition: lexer.c:160
const unsigned char else_state
Transition to this state if no other transition matches.
Definition: lexer.c:158
const rcl_lexer_transition_t transitions[12]
Transitions in the state machine (NULL value at end of array)
Definition: lexer.c:162
const unsigned char to_state
Index of a state to transition to.
Definition: lexer.c:146
const char range_end
End of a range of chars (inclusive) which activates this transition.
Definition: lexer.c:150
const char range_start
Start of a range of chars (inclusive) which activates this transition.
Definition: lexer.c:148
#define RCL_RET_OK
Success return code.
Definition: types.h:27
#define RCL_RET_INVALID_ARGUMENT
Invalid argument return code.
Definition: types.h:35
#define RCL_RET_ERROR
Unspecified error return code.
Definition: types.h:29
rmw_ret_t rcl_ret_t
The type that holds an rcl return code.
Definition: types.h:24