Line data Source code
1 : /**
2 : * The MIT License (MIT)
3 : *
4 : * Copyright (c) 2021 RSK Labs Ltd
5 : *
6 : * Permission is hereby granted, free of charge, to any person obtaining a copy
7 : * of this software and associated documentation files (the "Software"), to
8 : * deal in the Software without restriction, including without limitation the
9 : * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 : * sell copies of the Software, and to permit persons to whom the Software is
11 : * furnished to do so, subject to the following conditions:
12 : *
13 : * The above copyright notice and this permission notice shall be included in
14 : * all copies or substantial portions of the Software.
15 : *
16 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 : * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 : * IN THE SOFTWARE.
23 : */
24 :
25 : #include <stddef.h>
26 : #include <stdint.h>
27 : #include <string.h>
28 :
29 : #include "hal/platform.h"
30 : #include "hal/log.h"
31 :
32 : #include "srlp.h"
33 : #include "runtime.h"
34 :
35 : // This code tinkers with function pointers referenced from
36 : // const data structures, so we need to use the PIC macro.
37 : // See:
38 : // https://ledger.readthedocs.io/en/latest/userspace/memory.html?highlight=PIC#pic-and-model-implications
39 :
40 : // Parser state is modeled as a stack of context items of the form:
41 : //
42 : // { state, size, cursor }, where:
43 : //
44 : // - state: signals what the parser is currently parsing
45 : // - size: size in bytes of the structure being parsed
46 : // - cursor: number of bytes consumed for the structure being parsed
47 : //
48 : // At the bottom of the stack there's always a mark used to prevent a
49 : // stack underflow. The stack size is limited to MAX_RLP_CTX_DEPTH
50 : // items (see srlp.h).
51 :
52 : // Context item stack and stack pointer
53 : static uint8_t rlp_ctx_ptr;
54 : static rlp_ctx_t rlp_ctx[MAX_RLP_CTX_DEPTH + 1];
55 :
56 : // Pointer to the beginning of the next chunk to report
57 : static uint8_t* rlp_frame_start;
58 :
59 : // User callbacks
60 : static const rlp_callbacks_t* rlp_callbacks;
61 :
62 : /*
63 : * Initialize the parser.
64 : *
65 : * @arg[in] cbs struct holding callbacks to be called by the parser
66 : */
67 457 : void rlp_start(const rlp_callbacks_t* cbs) {
68 457 : rlp_callbacks = cbs;
69 457 : rlp_ctx_ptr = 0;
70 457 : explicit_bzero(rlp_ctx, sizeof(rlp_ctx));
71 457 : rlp_ctx[rlp_ctx_ptr].state = RLP_BOTTOM;
72 457 : }
73 :
74 : // Notify the user about a trivial bytearray (length = 0 or 1).
75 : // Defined as a macro to save stack space.
76 : #define TRIVIAL_BYTEARRAY(bytearray, len) \
77 : { \
78 : ((rlp_start_cb_t)PIC(rlp_callbacks->bytearray_start))(len); \
79 : ((rlp_chunk_cb_t)PIC(rlp_callbacks->bytearray_chunk))(bytearray, len); \
80 : ((rlp_end_cb_t)PIC(rlp_callbacks->bytearray_end))(); \
81 : }
82 :
83 : // Push a context item to the stack. Defined as a macro to save stack.
84 : // NOTE: Returns if stack overflow.
85 : //
86 : // @arg st state of context item to push
87 : // @arg sz size in bytes of the item top parse
88 : //
89 : #define RLP_PUSH_CTX(st, sz) \
90 : { \
91 : if (rlp_ctx_ptr == MAX_RLP_CTX_DEPTH) { \
92 : return RLP_STACK_OVERFLOW; \
93 : } \
94 : ++rlp_ctx_ptr; \
95 : rlp_ctx[rlp_ctx_ptr].state = (st); \
96 : rlp_ctx[rlp_ctx_ptr].size = (sz); \
97 : rlp_ctx[rlp_ctx_ptr].cursor = 0; \
98 : }
99 :
100 : // Pop current context. Defined as a macro to save stack space.
101 : // NOTE: Returns if stack underflow.
102 : //
103 : #define RLP_POP_CTX() \
104 : { \
105 : if (rlp_ctx_ptr == 0) { \
106 : return RLP_STACK_UNDERFLOW; \
107 : } \
108 : rlp_state_t state = rlp_ctx[rlp_ctx_ptr].state; \
109 : if (state == RLP_LIST) { \
110 : ((rlp_end_cb_t)PIC(rlp_callbacks->list_end))(); \
111 : } else if (state == RLP_STR) { \
112 : ((rlp_end_cb_t)PIC(rlp_callbacks->bytearray_end))(); \
113 : } \
114 : --rlp_ctx_ptr; \
115 : }
116 :
117 : // Called for every consumed byte of the input buffer. Traverse stack from
118 : // top to bottom, incrementing the number of consumed bytes for each list
119 : // context.
120 : // If consumed bytes reaches list size on the top context, pop it.
121 : // If consumed bytes reaches list size on a non-top context, fail due to
122 : // inconsistency.
123 : //
124 : // Defined as a macro to save stack.
125 : // NOTE: Returns if stack underflow
126 : //
127 : #define RLP_UPDATE_LISTS() \
128 : { \
129 : int __ix; \
130 : for (__ix = rlp_ctx_ptr; __ix >= 0; --__ix) { \
131 : if (rlp_ctx[__ix].state != RLP_LIST) { \
132 : continue; \
133 : } \
134 : ++rlp_ctx[__ix].cursor; \
135 : if (rlp_ctx[__ix].cursor == rlp_ctx[__ix].size) { \
136 : if (__ix == rlp_ctx_ptr) { \
137 : RLP_POP_CTX(); \
138 : } else { \
139 : return RLP_MALFORMED; \
140 : } \
141 : } \
142 : } \
143 : }
144 :
145 : // Handle beginning of list. Defined as a macro to save stack space.
146 : #define HANDLE_RLP_LIST(b) \
147 : { \
148 : if (*b <= 0x7F) { \
149 : TRIVIAL_BYTEARRAY(rlp_frame_start, 1); \
150 : } else if (*b == 0x80) { \
151 : TRIVIAL_BYTEARRAY(rlp_frame_start, 0); \
152 : } else if (*b <= 0xB7) { \
153 : RLP_PUSH_CTX(RLP_STR, *b - 0x80); \
154 : ((rlp_start_cb_t)PIC(rlp_callbacks->bytearray_start))(*b - 0x80); \
155 : } else if (*b <= 0xBF) { \
156 : if (*b - 0xB7 > (int)sizeof(uint16_t)) { \
157 : return RLP_TOO_LONG; \
158 : } \
159 : RLP_PUSH_CTX(RLP_STR_LEN, *b - 0xB7); \
160 : } else if (*b <= 0xF7) { \
161 : RLP_PUSH_CTX(RLP_LIST, *b - 0xC0 + 1); \
162 : ((rlp_start_cb_t)PIC(rlp_callbacks->list_start))(*b - 0xC0); \
163 : } else { \
164 : if (*b - 0xF7 > (int)sizeof(uint16_t)) { \
165 : return RLP_TOO_LONG; \
166 : } \
167 : RLP_PUSH_CTX(RLP_LIST_LEN, *b - 0xF7); \
168 : } \
169 : rlp_frame_start = b + 1; \
170 : }
171 :
172 : // Handle beginning of byte array. Defined as a macero to save space.
173 : #define HANDLE_RLP_STR(b) \
174 : { \
175 : ++rlp_ctx[rlp_ctx_ptr].cursor; \
176 : if (rlp_ctx[rlp_ctx_ptr].size == rlp_ctx[rlp_ctx_ptr].cursor) { \
177 : ((rlp_chunk_cb_t)PIC(rlp_callbacks->bytearray_chunk))( \
178 : rlp_frame_start, b - rlp_frame_start + 1); \
179 : RLP_POP_CTX(); \
180 : rlp_frame_start = b + 1; \
181 : } \
182 : }
183 :
184 : /** Print the given SRLP context (see srlp.h) */
185 : #ifdef DEBUG_SRLP
186 : static void LOG_SRLP_CTX(uint8_t v, rlp_ctx_t ctx[], uint8_t ptr) {
187 : LOG("'0x%02x' ; <%u> ; ", v, ptr);
188 : for (int i = ptr; i >= 0; --i) {
189 : rlp_ctx_t cur = ctx[i];
190 : LOG("{%d, %u, %u} ; ", cur.state, cur.size, cur.cursor);
191 : }
192 : LOG("{EOC}\n");
193 : }
194 : #else
195 : #define LOG_SRLP_CTX(...)
196 : #endif
197 :
198 : /*
199 : * Consume a chunk of bytes.
200 : *
201 : * @arg[in] buf: buffer holdoing bytes to be consumed
202 : * @arg[in] len: number of bytes to consume in buffer
203 : *
204 : * @return
205 : * RLP_OK if bytes were consumed successfully
206 : * RLP_TOO_LONG if len greater than RLP_BUFFER_SIZE
207 : * RLP_STACK_OVERFLOW if list nesting level is greater than MAX_RLP_CTX_DEPTH
208 : * RLP_STACK_UNDERFLOW if RLP to parse is ill-formed (e.g., [[a])
209 : * RLP_MALFORMED if RLP to parse is ill-formed wrt reported sizes
210 : */
211 4659 : int rlp_consume(uint8_t* buf, const uint8_t len) {
212 4659 : if (len > RLP_BUFFER_SIZE) {
213 0 : return RLP_TOO_LONG;
214 : }
215 :
216 4659 : rlp_frame_start = buf;
217 306346 : for (uint8_t i = 0; i < len; ++i) {
218 301695 : uint8_t* b = buf + i;
219 :
220 : LOG_SRLP_CTX(*b, rlp_ctx, rlp_ctx_ptr);
221 :
222 301695 : switch (rlp_ctx[rlp_ctx_ptr].state) {
223 8462 : case RLP_BOTTOM:
224 : case RLP_LIST:
225 8462 : HANDLE_RLP_LIST(b);
226 8462 : break;
227 1755 : case RLP_LIST_LEN:
228 1755 : if (rlp_ctx[rlp_ctx_ptr].size == 0) {
229 651 : rlp_ctx[rlp_ctx_ptr].state = RLP_LIST;
230 651 : rlp_ctx[rlp_ctx_ptr].size = rlp_ctx[rlp_ctx_ptr].cursor;
231 651 : rlp_ctx[rlp_ctx_ptr].cursor = 0;
232 651 : rlp_frame_start = b;
233 651 : ((rlp_start_cb_t)PIC(rlp_callbacks->list_start))(
234 651 : rlp_ctx[rlp_ctx_ptr].size);
235 651 : HANDLE_RLP_LIST(b);
236 : } else {
237 1104 : rlp_ctx[rlp_ctx_ptr].size -= 1;
238 1104 : rlp_ctx[rlp_ctx_ptr].cursor =
239 1104 : (rlp_ctx[rlp_ctx_ptr].cursor << 8) | *b;
240 : }
241 1755 : break;
242 288697 : case RLP_STR:
243 288697 : HANDLE_RLP_STR(b);
244 288693 : break;
245 2781 : case RLP_STR_LEN:
246 2781 : if (rlp_ctx[rlp_ctx_ptr].size == 0) {
247 1165 : rlp_ctx[rlp_ctx_ptr].state = RLP_STR;
248 1165 : rlp_ctx[rlp_ctx_ptr].size = rlp_ctx[rlp_ctx_ptr].cursor;
249 1165 : rlp_ctx[rlp_ctx_ptr].cursor = 0;
250 1165 : rlp_frame_start = b;
251 1165 : ((rlp_start_cb_t)PIC(rlp_callbacks->bytearray_start))(
252 1165 : rlp_ctx[rlp_ctx_ptr].size);
253 1165 : HANDLE_RLP_STR(b);
254 : } else {
255 1616 : rlp_ctx[rlp_ctx_ptr].size -= 1;
256 1616 : rlp_ctx[rlp_ctx_ptr].cursor =
257 1616 : (rlp_ctx[rlp_ctx_ptr].cursor << 8) | *b;
258 : }
259 2781 : break;
260 : }
261 :
262 : // Increment consumed bytes for all lists in the context
263 1227641 : RLP_UPDATE_LISTS();
264 : }
265 :
266 : // If the item being parsed is a byte array, notify about the seen chunk.
267 4651 : if (rlp_ctx[rlp_ctx_ptr].state == RLP_STR) {
268 4151 : ((rlp_chunk_cb_t)PIC(rlp_callbacks->bytearray_chunk))(
269 4151 : rlp_frame_start, buf + len - rlp_frame_start);
270 : }
271 :
272 4651 : return RLP_OK;
273 : }
274 :
275 : /*
276 : * Guess the length in bytes of the RLP prefix for str of the given size.
277 : *
278 : * NOTE: This guessing because for single byte strings we need the str
279 : * value to determine accurately. For single byte strings, this method
280 : * always return one. It's up to the caller to take this into account.
281 : *
282 : * @arg[in] str_size string size
283 : */
284 7629 : uint8_t guess_rlp_str_prefix_size(uint16_t str_size) {
285 7629 : if (str_size == 0)
286 1780 : return 1;
287 5849 : else if (str_size == 1)
288 : // Guessing happens here: we should return zero if
289 : // the string's single byte is less or equals 0x7F.
290 817 : return 1;
291 5032 : else if (str_size <= 55)
292 3944 : return 1;
293 : else {
294 : uint8_t n;
295 2568 : for (n = 0; str_size != 0; str_size >>= 8, ++n)
296 : ;
297 1088 : return n + 1;
298 : }
299 : }
300 :
301 : /*
302 : * Get the length in bytes of the (minimal) RLP prefix for a list of the
303 : * given size (max size for any given list is 2^16-1 in this
304 : * implementation)
305 : *
306 : * @arg[in] list_size list size
307 : */
308 50 : uint8_t rlp_list_prefix_size(uint16_t list_size) {
309 50 : if (list_size <= 55)
310 0 : return 1;
311 :
312 : uint8_t n;
313 146 : for (n = 0; list_size != 0; list_size >>= 8, ++n)
314 : ;
315 50 : return n + 1;
316 : }
|