%{
/* dumbhex.l
   
   A simple scanner of hexadecimal strings.

   Copyright (C) 2007, 2008, 2009, 2010 Eloy Paris

   This is part of Network Expect.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
   The idea behind the service offered by this module is to convert ASCII
   input like:

   "0x0000:  4500 0034 66d0 4000 4006 c557 0a74 bc3d
    0x0010:  26e5 2106 ddae 1a29 4aa1 9cb9 f978 97e5
    0x0020:  8010 03ac aa6c 0000 0101 080a 044f a24a
    0x0030:  7244 3099"

   to an array of bytes that corresponds to the hexadecimal
   representation of the bytes in the string. The algorithm is very
   simplistic and dumb; hence the name "dumbhex".

   This is useful, for example, when importing packet data for which
   only a hexadecimal dump is available.
*/

#include <ctype.h>
#include <glib.h>

#include "xstrdup.h"

struct scanner_vars {
    int prefix_count;
    int prefixes_to_ignore;
    GByteArray *array;
};

%}

%option reentrant
%option extra-type="struct scanner_vars *"
%option noyywrap
/*
 * We don't use unput, so don't generate code for it.
 */
%option nounput

HEXDIGIT	[0-9a-fA-F]
ID		[_a-z][_a-z0-9]*

%x IN_COMMENT

%%

		/* C-style comments. See "How can I match C-style comments?"
		   in the flex FAQ */
<INITIAL>{
"/*"		BEGIN(IN_COMMENT);
}
<IN_COMMENT>{
"*/"		BEGIN(INITIAL);
[^*\n]+		/* eat comment in chunks */
"*"		/* eat the lone star */
\n		/* eat end of line */
}

0x|\\x		/* Ignore prefixes that explicitely specify a hexadecimal
		   number since everything we parse is assumed to be
		   hexadecimal numbers */

{HEXDIGIT}+	{
		    int i, c;
		    guint len;

		    /*
		     * We have a string of hexadecimal digits. Before doing
		     * anything with it we must check whether we have reached
		     * the number of prefixes to ignore. For instance, in the
		     * following input:
		     *
		     * 00000000: 12 12 12 12   34 34 34 34
		     * 00000008: 12 12 12 12   34 34 34 34
		     * 0000000c: 12 12 12 12   34 34 34 34
		     *
		     * the user most likely wants the data after the
		     * "offset" field, i.e. the first field. In this case
		     * the user would set "prefixes_to_ignore" to 1 and
		     * our scanner would ignore the first field (prefix).
		     */

		    if (++yyextra->prefix_count > yyextra->prefixes_to_ignore) {
			/*
			 * We have ignored the required number of prefixes
			 * so we can now process the hexadecimal string.
			 */
			for (i = 0; i < dumbhexget_leng(yyscanner); i++) {
			    c = tolower(dumbhexget_text(yyscanner)[i]);
			    c -= c < 'a' ? '0': 'a' - 0xa;

			    len = yyextra->array->len;

			    if (i & 1) {
				/* Lower nibble; careful with the length.. */
				yyextra->array->data[len - 1] |= (c & 0xf);
			    } else {
				/* Upper nibble */
				g_byte_array_set_size(yyextra->array, len + 1);
				yyextra->array->data[len] = c << 4;
			    }
			}
		    }
		}

{ID}		/* Ignore words, even if they have digits inside them */

[ \t]+		/* eat up whitespace */

\n		yyextra->prefix_count = 0;

.		/* Anything else we ignore */

%%

GByteArray *
dh_parsehex(const char *hexstr, int prefixes_to_ignore, int ignore_after_col)
{
    void *scanner;
    void *yybuf;
    struct scanner_vars vars;
    char *s, *t;
    int c, col;

    s = t = xstrdup(hexstr);

    if (ignore_after_col > 0) {
	/*
	 * Pre-process input string. The goal is to make anything after column
	 * "ignore_after_col" an asterisk.
	 */
	for (col = 1; (c = *s++); ) {
	    if (c == '\n') {
		col = 1;
		continue;
	    }

	    if (++col >= ignore_after_col) {
		while (*s && *s != '\n')
		    *s++ = '*';
	    }
	}
    }

    vars.prefixes_to_ignore = prefixes_to_ignore;
    vars.prefix_count = 0;
    vars.array = g_byte_array_sized_new(s - t);

    dumbhexlex_init_extra(&vars, &scanner);
    yybuf = dumbhex_scan_string(t, scanner);
    dumbhexlex(scanner);
    dumbhex_delete_buffer(yybuf, scanner);
    dumbhexlex_destroy(scanner);

    free(t);

    return vars.array;
}
