(best_matching_font): Abort for best == NULL before we start to use it.
[bpt/emacs.git] / lisp / emacs-lisp / bindat.el
CommitLineData
4141da38
KS
1;;; bindat.el --- binary data structure packing and unpacking.
2
ceb4c4d3 3;; Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4141da38
KS
4
5;; Author: Kim F. Storm <storm@cua.dk>
6;; Assignment name: struct.el
7;; Keywords: comm data processes
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
23;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24;; Boston, MA 02110-1301, USA.
4141da38
KS
25
26;;; Commentary:
27
28;; Packing and unpacking of (binary) data structures.
29;;
30;; The data formats used in binary files and network protocols are
31;; often structed data which can be described by a C-style structure
32;; such as the one shown below. Using the bindat package, decoding
33;; and encoding binary data formats like these is made simple using a
34;; structure specification which closely resembles the C style
35;; structure declarations.
a1506d29 36;;
4141da38 37;; Encoded (binary) data is stored in a unibyte string or vector,
a1506d29 38;; while the decoded data is stored in an alist with (FIELD . VALUE)
4141da38
KS
39;; pairs.
40
41;; Example:
a1506d29 42
4141da38 43;; Consider the following C structures:
a1506d29 44;;
4141da38
KS
45;; struct header {
46;; unsigned long dest_ip;
47;; unsigned long src_ip;
48;; unsigned short dest_port;
49;; unsigned short src_port;
50;; };
a1506d29 51;;
4141da38
KS
52;; struct data {
53;; unsigned char type;
54;; unsigned char opcode;
55;; unsigned long length; /* In little endian order */
56;; unsigned char id[8]; /* nul-terminated string */
57;; unsigned char data[/* (length + 3) & ~3 */];
58;; };
a1506d29 59;;
4141da38
KS
60;; struct packet {
61;; struct header header;
62;; unsigned char items;
63;; unsigned char filler[3];
64;; struct data item[/* items */];
65;; };
a1506d29 66;;
4141da38 67;; The corresponding Lisp bindat specification looks like this:
a1506d29 68;;
4141da38
KS
69;; (setq header-spec
70;; '((dest-ip ip)
71;; (src-ip ip)
72;; (dest-port u16)
73;; (src-port u16)))
a1506d29 74;;
4141da38
KS
75;; (setq data-spec
76;; '((type u8)
77;; (opcode u8)
78;; (length u16r) ;; little endian order
79;; (id strz 8)
80;; (data vec (length))
81;; (align 4)))
a1506d29 82;;
4141da38
KS
83;; (setq packet-spec
84;; '((header struct header-spec)
85;; (items u8)
86;; (fill 3)
87;; (item repeat (items)
8b09abe1 88;; (struct data-spec))))
a1506d29 89;;
4141da38
KS
90;;
91;; A binary data representation may look like
a1506d29 92;; [ 192 168 1 100 192 168 1 101 01 28 21 32 2 0 0 0
4141da38
KS
93;; 2 3 5 0 ?A ?B ?C ?D ?E ?F 0 0 1 2 3 4 5 0 0 0
94;; 1 4 7 0 ?B ?C ?D ?E ?F ?G 0 0 6 7 8 9 10 11 12 0 ]
a1506d29 95;;
4141da38
KS
96;; The corresponding decoded structure looks like
97;;
98;; ((header
99;; (dest-ip . [192 168 1 100])
100;; (src-ip . [192 168 1 101])
101;; (dest-port . 284)
102;; (src-port . 5408))
103;; (items . 2)
104;; (item ((data . [1 2 3 4 5])
105;; (id . "ABCDEF")
106;; (length . 5)
107;; (opcode . 3)
108;; (type . 2))
109;; ((data . [6 7 8 9 10 11 12])
110;; (id . "BCDEFG")
111;; (length . 7)
112;; (opcode . 4)
113;; (type . 1))))
114;;
115;; To access a specific value in this structure, use the function
116;; bindat-get-field with the structure as first arg followed by a list
117;; of field names and array indexes, e.g. using the data above,
118;; (bindat-get-field decoded-structure 'item 1 'id)
119;; returns "BCDEFG".
120
121;; Binary Data Structure Specification Format
122;; ------------------------------------------
123
124;; The data specification is formatted as follows:
125
126;; SPEC ::= ( ITEM... )
127
128;; ITEM ::= ( [FIELD] TYPE )
129;; | ( [FIELD] eval FORM ) -- eval FORM for side-effect only
130;; | ( [FIELD] fill LEN ) -- skip LEN bytes
131;; | ( [FIELD] align LEN ) -- skip to next multiple of LEN bytes
132;; | ( [FIELD] struct SPEC_NAME )
133;; | ( [FIELD] union TAG_VAL (TAG SPEC)... [(t SPEC)] )
8b09abe1 134;; | ( [FIELD] repeat COUNT ITEM... )
4141da38
KS
135
136;; -- In (eval EXPR), the value of the last field is available in
137;; the dynamically bound variable `last'.
138
139;; TYPE ::= ( eval EXPR ) -- interpret result as TYPE
140;; | u8 | byte -- length 1
141;; | u16 | word | short -- length 2, network byte order
142;; | u24 -- 3-byte value
143;; | u32 | dword | long -- length 4, network byte order
144;; | u16r | u24r | u32r -- little endian byte order.
145;; | str LEN -- LEN byte string
146;; | strz LEN -- LEN byte (zero-terminated) string
147;; | vec LEN -- LEN byte vector
148;; | ip -- 4 byte vector
149;; | bits LEN -- List with bits set in LEN bytes.
150;;
151;; -- Note: 32 bit values may be limited by emacs' INTEGER
152;; implementation limits.
153;;
8b09abe1
TTN
154;; -- Example: `bits 2' will unpack 0x28 0x1c to (2 3 4 11 13)
155;; and 0x1c 0x28 to (3 5 10 11 12).
4141da38
KS
156
157;; FIELD ::= ( eval EXPR ) -- use result as NAME
158;; | NAME
159
160;; LEN ::= ARG
161;; | <omitted> | nil -- LEN = 1
162
163
164;; TAG_VAL ::= ARG
165
166;; TAG ::= LISP_CONSTANT
167;; | ( eval EXPR ) -- return non-nil if tag match;
168;; current TAG_VAL in `tag'.
169
170;; ARG ::= ( eval EXPR ) -- interpret result as ARG
171;; | INTEGER_CONSTANT
172;; | DEREF
173
174;; DEREF ::= ( [NAME | INTEGER]... ) -- Field NAME or Array index relative to
175;; current structure spec.
176;; -- see bindat-get-field
177
178;; A `union' specification
179;; ([FIELD] union TAG_VAL (TAG SPEC) ... [(t SPEC)])
a1506d29 180;; is interpreted by evalling TAG_VAL and then comparing that to
4141da38
KS
181;; each TAG using equal; if a match is found, the corresponding SPEC
182;; is used.
183;; If TAG is a form (eval EXPR), EXPR is evalled with `tag' bound to the
184;; value of TAG_VAL; the corresponding SPEC is used if the result is non-nil.
185;; Finally, if TAG is t, the corresponding SPEC is used unconditionally.
186;;
187;; An `eval' specification
188;; ([FIELD] eval FORM)
189;; is interpreted by evalling FORM for its side effects only.
190;; If FIELD is specified, the value is bound to that field.
191;; The FORM may access and update `raw-data' and `pos' (see `bindat-unpack'),
192;; as well as the lisp data structure in `struct'.
193
194;;; Code:
195
196;; Helper functions for structure unpacking.
197;; Relies on dynamic binding of RAW-DATA and POS
198
199(defvar raw-data)
200(defvar pos)
201
202(defun bindat--unpack-u8 ()
203 (prog1
204 (if (stringp raw-data)
205 (string-to-char (substring raw-data pos (1+ pos)))
206 (aref raw-data pos))
207 (setq pos (1+ pos))))
a1506d29 208
4141da38
KS
209(defun bindat--unpack-u16 ()
210 (let* ((a (bindat--unpack-u8)) (b (bindat--unpack-u8)))
211 (logior (lsh a 8) b)))
212
213(defun bindat--unpack-u24 ()
214 (let* ((a (bindat--unpack-u16)) (b (bindat--unpack-u8)))
215 (logior (lsh a 8) b)))
216
217(defun bindat--unpack-u32 ()
218 (let* ((a (bindat--unpack-u16)) (b (bindat--unpack-u16)))
219 (logior (lsh a 16) b)))
220
221(defun bindat--unpack-u16r ()
222 (let* ((a (bindat--unpack-u8)) (b (bindat--unpack-u8)))
223 (logior a (lsh b 8))))
224
225(defun bindat--unpack-u24r ()
226 (let* ((a (bindat--unpack-u16r)) (b (bindat--unpack-u8)))
227 (logior a (lsh b 16))))
228
229(defun bindat--unpack-u32r ()
230 (let* ((a (bindat--unpack-u16r)) (b (bindat--unpack-u16r)))
231 (logior a (lsh b 16))))
232
233(defun bindat--unpack-item (type len)
234 (if (eq type 'ip)
235 (setq type 'vec len 4))
236 (cond
237 ((memq type '(u8 byte))
238 (bindat--unpack-u8))
239 ((memq type '(u16 word short))
240 (bindat--unpack-u16))
241 ((eq type 'u24)
242 (bindat--unpack-u24))
243 ((memq type '(u32 dword long))
244 (bindat--unpack-u32))
245 ((eq type 'u16r)
246 (bindat--unpack-u16r))
247 ((eq type 'u24r)
248 (bindat--unpack-u24r))
249 ((eq type 'u32r)
250 (bindat--unpack-u32r))
251 ((eq type 'bits)
252 (let ((bits nil) (bnum (1- (* 8 len))) j m)
253 (while (>= bnum 0)
254 (if (= (setq m (bindat--unpack-u8)) 0)
255 (setq bnum (- bnum 8))
256 (setq j 128)
257 (while (> j 0)
258 (if (/= 0 (logand m j))
259 (setq bits (cons bnum bits)))
260 (setq bnum (1- bnum)
261 j (lsh j -1)))))
262 bits))
263 ((eq type 'str)
264 (let ((s (substring raw-data pos (+ pos len))))
265 (setq pos (+ pos len))
266 (if (stringp s) s
267 (string-make-unibyte (concat s)))))
268 ((eq type 'strz)
269 (let ((i 0) s)
270 (while (and (< i len) (/= (aref raw-data (+ pos i)) 0))
271 (setq i (1+ i)))
272 (setq s (substring raw-data pos (+ pos i)))
273 (setq pos (+ pos len))
274 (if (stringp s) s
275 (string-make-unibyte (concat s)))))
276 ((eq type 'vec)
277 (let ((v (make-vector len 0)) (i 0))
278 (while (< i len)
279 (aset v i (bindat--unpack-u8))
280 (setq i (1+ i)))
281 v))
282 (t nil)))
283
284(defun bindat--unpack-group (spec)
285 (let (struct last)
286 (while spec
287 (let* ((item (car spec))
288 (field (car item))
289 (type (nth 1 item))
290 (len (nth 2 item))
291 (tail 3)
292 data)
293 (setq spec (cdr spec))
294 (if (and (consp field) (eq (car field) 'eval))
295 (setq field (eval (car (cdr field)))))
296 (if (and type (consp type) (eq (car type) 'eval))
297 (setq type (eval (car (cdr type)))))
298 (if (and len (consp len) (eq (car len) 'eval))
299 (setq len (eval (car (cdr len)))))
300 (if (memq field '(eval fill align struct union))
301 (setq tail 2
302 len type
303 type field
304 field nil))
305 (if (and (consp len) (not (eq type 'eval)))
306 (setq len (apply 'bindat-get-field struct len)))
307 (if (not len)
308 (setq len 1))
309 (cond
310 ((eq type 'eval)
311 (if field
312 (setq data (eval len))
313 (eval len)))
314 ((eq type 'fill)
315 (setq pos (+ pos len)))
316 ((eq type 'align)
317 (while (/= (% pos len) 0)
318 (setq pos (1+ pos))))
319 ((eq type 'struct)
320 (setq data (bindat--unpack-group (eval len))))
321 ((eq type 'repeat)
322 (let ((index 0))
323 (while (< index len)
324 (setq data (cons (bindat--unpack-group (nthcdr tail item)) data))
325 (setq index (1+ index)))
326 (setq data (nreverse data))))
327 ((eq type 'union)
328 (let ((tag len) (cases (nthcdr tail item)) case cc)
329 (while cases
330 (setq case (car cases)
331 cases (cdr cases)
332 cc (car case))
333 (if (or (equal cc tag) (equal cc t)
334 (and (consp cc) (eval cc)))
335 (setq data (bindat--unpack-group (cdr case))
336 cases nil)))))
337 (t
338 (setq data (bindat--unpack-item type len)
339 last data)))
340 (if data
341 (if field
342 (setq struct (cons (cons field data) struct))
343 (setq struct (append data struct))))))
344 struct))
a1506d29 345
4141da38
KS
346(defun bindat-unpack (spec raw-data &optional pos)
347 "Return structured data according to SPEC for binary data in RAW-DATA.
348RAW-DATA is a string or vector. Optional third arg POS specifies the
349starting offset in RAW-DATA."
350 (unless pos (setq pos 0))
351 (bindat--unpack-group spec))
352
353(defun bindat-get-field (struct &rest field)
354 "In structured data STRUCT, return value of field named FIELD.
355If multiple field names are specified, use the field names to
356lookup nested sub-structures in STRUCT, corresponding to the
357C-language syntax STRUCT.FIELD1.FIELD2.FIELD3...
358An integer value in the field list is taken as an array index,
359e.g. corresponding to STRUCT.FIELD1[INDEX2].FIELD3..."
360 (while (and struct field)
361 (setq struct (if (integerp (car field))
362 (nth (car field) struct)
363 (let ((val (assq (car field) struct)))
364 (if (consp val) (cdr val)))))
365 (setq field (cdr field)))
366 struct)
367
368
a1506d29 369;; Calculate raw-data length of structured data
4141da38
KS
370
371(defvar bindat--fixed-length-alist
372 '((u8 . 1) (byte . 1)
373 (u16 . 2) (u16r . 2) (word . 2) (short . 2)
374 (u24 . 3) (u24r . 3)
375 (u32 . 4) (u32r . 4) (dword . 4) (long . 4)
376 (ip . 4)))
377
378(defun bindat--length-group (struct spec)
379 (let (last)
380 (while spec
381 (let* ((item (car spec))
382 (field (car item))
383 (type (nth 1 item))
384 (len (nth 2 item))
385 (tail 3))
386 (setq spec (cdr spec))
387 (if (and (consp field) (eq (car field) 'eval))
388 (setq field (eval (car (cdr field)))))
389 (if (and type (consp type) (eq (car type) 'eval))
390 (setq type (eval (car (cdr type)))))
391 (if (and len (consp len) (eq (car len) 'eval))
392 (setq len (eval (car (cdr len)))))
393 (if (memq field '(eval fill align struct union))
394 (setq tail 2
395 len type
396 type field
397 field nil))
398 (if (and (consp len) (not (eq type 'eval)))
399 (setq len (apply 'bindat-get-field struct len)))
400 (if (not len)
401 (setq len 1))
a1506d29 402 (cond
4141da38
KS
403 ((eq type 'eval)
404 (if field
405 (setq struct (cons (cons field (eval len)) struct))
406 (eval len)))
407 ((eq type 'fill)
408 (setq pos (+ pos len)))
409 ((eq type 'align)
410 (while (/= (% pos len) 0)
411 (setq pos (1+ pos))))
412 ((eq type 'struct)
413 (bindat--length-group
414 (if field (bindat-get-field struct field) struct) (eval len)))
415 ((eq type 'repeat)
416 (let ((index 0))
417 (while (< index len)
418 (bindat--length-group (nth index (bindat-get-field struct field)) (nthcdr tail item))
419 (setq index (1+ index)))))
420 ((eq type 'union)
421 (let ((tag len) (cases (nthcdr tail item)) case cc)
422 (while cases
423 (setq case (car cases)
424 cases (cdr cases)
425 cc (car case))
426 (if (or (equal cc tag) (equal cc t)
427 (and (consp cc) (eval cc)))
428 (progn
429 (bindat--length-group struct (cdr case))
430 (setq cases nil))))))
431 (t
432 (if (setq type (assq type bindat--fixed-length-alist))
433 (setq len (cdr type)))
434 (if field
435 (setq last (bindat-get-field struct field)))
436 (setq pos (+ pos len))))))))
437
438(defun bindat-length (spec struct)
439 "Calculate raw-data length for STRUCT according to bindat specification SPEC."
440 (let ((pos 0))
441 (bindat--length-group struct spec)
442 pos))
443
444
445;; Pack structured data into raw-data
446
447(defun bindat--pack-u8 (v)
448 (aset raw-data pos (logand v 255))
449 (setq pos (1+ pos)))
a1506d29 450
4141da38
KS
451(defun bindat--pack-u16 (v)
452 (aset raw-data pos (logand (lsh v -8) 255))
453 (aset raw-data (1+ pos) (logand v 255))
454 (setq pos (+ pos 2)))
455
456(defun bindat--pack-u24 (v)
457 (bindat--pack-u8 (lsh v -16))
458 (bindat--pack-u16 v))
459
460(defun bindat--pack-u32 (v)
461 (bindat--pack-u16 (lsh v -16))
462 (bindat--pack-u16 v))
463
464(defun bindat--pack-u16r (v)
465 (aset raw-data (1+ pos) (logand (lsh v -8) 255))
466 (aset raw-data pos (logand v 255))
467 (setq pos (+ pos 2)))
468
469(defun bindat--pack-u24r (v)
470 (bindat--pack-u16r v)
471 (bindat--pack-u8 (lsh v -16)))
472
473(defun bindat--pack-u32r (v)
474 (bindat--pack-u16r v)
475 (bindat--pack-u16r (lsh v -16)))
476
477(defun bindat--pack-item (v type len)
478 (if (eq type 'ip)
479 (setq type 'vec len 4))
480 (cond
481 ((null v)
482 (setq pos (+ pos len)))
483 ((memq type '(u8 byte))
484 (bindat--pack-u8 v))
485 ((memq type '(u16 word short))
486 (bindat--pack-u16 v))
487 ((eq type 'u24)
488 (bindat--pack-u24 v))
489 ((memq type '(u32 dword long))
490 (bindat--pack-u32 v))
491 ((eq type 'u16r)
492 (bindat--pack-u16r v))
493 ((eq type 'u24r)
494 (bindat--pack-u24r v))
495 ((eq type 'u32r)
496 (bindat--pack-u32r v))
497 ((eq type 'bits)
498 (let ((bnum (1- (* 8 len))) j m)
499 (while (>= bnum 0)
500 (setq m 0)
501 (if (null v)
502 (setq bnum (- bnum 8))
503 (setq j 128)
504 (while (> j 0)
505 (if (memq bnum v)
506 (setq m (logior m j)))
507 (setq bnum (1- bnum)
508 j (lsh j -1))))
509 (bindat--pack-u8 m))))
510 ((memq type '(str strz vec))
511 (let ((l (length v)) (i 0))
512 (if (> l len) (setq l len))
513 (while (< i l)
514 (aset raw-data (+ pos i) (aref v i))
515 (setq i (1+ i)))
516 (setq pos (+ pos len))))
a1506d29 517 (t
4141da38
KS
518 (setq pos (+ pos len)))))
519
520(defun bindat--pack-group (struct spec)
521 (let (last)
522 (while spec
523 (let* ((item (car spec))
524 (field (car item))
525 (type (nth 1 item))
526 (len (nth 2 item))
527 (tail 3))
528 (setq spec (cdr spec))
529 (if (and (consp field) (eq (car field) 'eval))
530 (setq field (eval (car (cdr field)))))
531 (if (and type (consp type) (eq (car type) 'eval))
532 (setq type (eval (car (cdr type)))))
533 (if (and len (consp len) (eq (car len) 'eval))
534 (setq len (eval (car (cdr len)))))
535 (if (memq field '(eval fill align struct union))
536 (setq tail 2
537 len type
538 type field
539 field nil))
540 (if (and (consp len) (not (eq type 'eval)))
541 (setq len (apply 'bindat-get-field struct len)))
542 (if (not len)
543 (setq len 1))
a1506d29 544 (cond
4141da38
KS
545 ((eq type 'eval)
546 (if field
547 (setq struct (cons (cons field (eval len)) struct))
548 (eval len)))
549 ((eq type 'fill)
550 (setq pos (+ pos len)))
551 ((eq type 'align)
552 (while (/= (% pos len) 0)
553 (setq pos (1+ pos))))
554 ((eq type 'struct)
555 (bindat--pack-group
556 (if field (bindat-get-field struct field) struct) (eval len)))
557 ((eq type 'repeat)
558 (let ((index 0))
559 (while (< index len)
560 (bindat--pack-group (nth index (bindat-get-field struct field)) (nthcdr tail item))
561 (setq index (1+ index)))))
562 ((eq type 'union)
563 (let ((tag len) (cases (nthcdr tail item)) case cc)
564 (while cases
565 (setq case (car cases)
566 cases (cdr cases)
567 cc (car case))
568 (if (or (equal cc tag) (equal cc t)
569 (and (consp cc) (eval cc)))
570 (progn
571 (bindat--pack-group struct (cdr case))
572 (setq cases nil))))))
573 (t
574 (setq last (bindat-get-field struct field))
575 (bindat--pack-item last type len)
576 ))))))
577
578(defun bindat-pack (spec struct &optional raw-data pos)
13b563f5 579 "Return binary data packed according to SPEC for structured data STRUCT.
4141da38
KS
580Optional third arg RAW-DATA is a pre-allocated string or vector to unpack into.
581Optional fourth arg POS is the starting offset into RAW-DATA.
582Note: The result is a multibyte string; use `string-make-unibyte' on it
583to make it unibyte if necessary."
584 (let ((no-return raw-data))
585 (unless pos (setq pos 0))
586 (unless raw-data (setq raw-data (make-vector (+ pos (bindat-length spec struct)) 0)))
587 (bindat--pack-group struct spec)
588 (if no-return nil (concat raw-data))))
589
590
591;; Misc. format conversions
592
593(defun bindat-format-vector (vect fmt sep &optional len)
594 "Format vector VECT using element format FMT and separator SEP.
595Result is a string with each element of VECT formatted using FMT and
596separated by the string SEP. If optional fourth arg LEN is given, use
597only that many elements from VECT."
598 (unless len
599 (setq len (length vect)))
600 (let ((i len) (fmt2 (concat sep fmt)) (s nil))
601 (while (> i 0)
602 (setq i (1- i)
603 s (cons (format (if (= i 0) fmt fmt2) (aref vect i)) s)))
604 (apply 'concat s)))
a1506d29 605
4141da38
KS
606(defun bindat-vector-to-dec (vect &optional sep)
607 "Format vector VECT in decimal format separated by dots.
608If optional second arg SEP is a string, use that as separator."
609 (bindat-format-vector vect "%d" (if (stringp sep) sep ".")))
610
611(defun bindat-vector-to-hex (vect &optional sep)
612 "Format vector VECT in hex format separated by dots.
613If optional second arg SEP is a string, use that as separator."
614 (bindat-format-vector vect "%02x" (if (stringp sep) sep ":")))
615
616(defun bindat-ip-to-string (ip)
617 "Format vector IP as an ip address in dotted notation."
618 (format "%d.%d.%d.%d"
619 (aref ip 0) (aref ip 1) (aref ip 2) (aref ip 3)))
620
621(provide 'bindat)
622
ab5796a9 623;;; arch-tag: 5e6708c3-03e2-4ad7-9885-5041b779c3fb
4141da38 624;;; bindat.el ends here