#include "vg_kv.h"
-void vg_kv_parse( vg_kvs *out_kvs, const char *source, vg_stack_allocator *stack )
+void vg_kvs_init( vg_kvs *kvs, vg_stack_allocator *stack )
{
- out_kvs->source_buffer = source;
- out_kvs->kv_count = 1;
- out_kvs->kvs = vg_stack_allocate( stack, sizeof(vg_kv), 8, "KV Metadata buffer" );
- vg_zero_mem( out_kvs->kvs, sizeof(vg_kv) );
-
- u32 token_start=0,
- token_length=0,
- token_hash=0,
- depth=0;
- char delim = 0;
-
- struct
+ vg_zero_mem( kvs, sizeof(vg_kvs) );
+ kvs->stack = stack;
+}
+
+static vg_kv *vg_kvs_newkv( vg_kvs *kvs )
+{
+ void *kv_page;
+ if( (kvs->kv_page_count == VG_KV_PAGE_COUNT) || (kvs->kv_page_offset == 0) )
+ {
+ u32 page_size = sizeof(vg_kv)*VG_KV_PAGE_COUNT;
+ kv_page = vg_stack_allocate( kvs->stack, page_size, 64, "KV Page" );
+ vg_zero_mem( kv_page, page_size );
+ kvs->kv_page_offset = vg_stack_offset( kvs->stack, kv_page );
+ kvs->kv_page_count = 0;
+ kvs->stat_memory_pages += page_size;
+ }
+ else
+ kv_page = vg_stack_pointer( kvs->stack, kvs->kv_page_offset );
+
+ vg_kv *kv = kv_page + kvs->kv_page_count * sizeof(vg_kv);
+ kvs->kv_page_count ++;
+ return kv;
+}
+
+static u32 vg_kv_string_append( vg_kvs *kvs, const char *string )
+{
+ if( string == NULL )
+ return 0;
+ char *buf = vg_stack_allocate( kvs->stack, strlen(string)+1, 1, "KV string (appended)" );
+ strcpy( buf, string );
+ return vg_stack_offset( kvs->stack, buf );
+}
+
+u32 vg_kv_append( vg_kvs *kvs, u32 parent_offset, const char *key, const char *value )
+{
+ vg_kv *kv = vg_kvs_newkv( kvs );
+ u32 key_offset = vg_kv_string_append( kvs, key ),
+ value_offset = vg_kv_string_append( kvs, value );
+
+ kv->key_info = (vg_strdjb2(key) & 0xFFFFF) | (key?(strlen(key)<<20):0) | (value?(0x1<<30):0);
+ kv->key_offset = key_offset;
+
+ if( value )
+ {
+ VG_ASSERT( key );
+ kv->value.offset_from_key = value_offset-key_offset;
+ kv->value.length = strlen(value);
+ }
+
+ u32 kv_offset = vg_stack_offset( kvs->stack, kv );
+ vg_kv *parent = vg_stack_pointer( kvs->stack, parent_offset );
+ if( parent->first_child_offset )
{
- u32 frame_id, latest_child_id;
+ u32 brother_offset = parent->first_child_offset;
+ while( 1 )
+ {
+ vg_kv *brother = vg_stack_pointer( kvs->stack, brother_offset );
+ if( brother->brother_offset )
+ brother_offset = brother->brother_offset;
+ else
+ {
+ brother->brother_offset = kv_offset;
+ break;
+ }
+ }
}
- frame_stack[ 64 ];
- frame_stack[0].frame_id = 0;
- frame_stack[0].latest_child_id = 0;
+ else parent->first_child_offset = kv_offset;
+ return kv_offset;
+}
- u32 t0_start=0, t0_length=0, t0_hash=0;
+void vg_kv_parser_init( vg_kv_parser *parser, vg_kvs *out_kvs )
+{
+ vg_zero_mem( parser, sizeof(vg_kv_parser) );
+ parser->kvs = out_kvs;
- for( u32 i=0; i<0xffffffff; i ++ )
+ vg_kv *root_kv = vg_kvs_newkv( parser->kvs );
+ out_kvs->root_offset = vg_stack_offset( parser->kvs->stack, root_kv );
+ parser->frame_stack[0].frame_offset = out_kvs->root_offset;
+}
+
+void vg_kv_link( vg_kv_parser *parser, u32 offset, u32 depth )
+{
+ u32 parent_offset = parser->frame_stack[ depth ].frame_offset;
+ vg_kv *parent = vg_stack_pointer( parser->kvs->stack, parent_offset );
+ if( parent->first_child_offset == 0 )
+ parent->first_child_offset = offset;
+
+ u32 brother_offset = parser->frame_stack[ depth ].latest_child_offset;
+ if( brother_offset )
{
- out_kvs->source_length = i;
+ vg_kv *brother = vg_stack_pointer( parser->kvs->stack, brother_offset );
+ brother->brother_offset = offset;
+ }
+ parser->frame_stack[ depth ].latest_child_offset = offset;
+}
+
+void vg_kv_parse_buffer( vg_kv_parser *parser, const char *buffer, u32 buffer_length )
+{
+ if( buffer_length == 0 )
+ buffer_length = 0xffffffff;
- char c = source[i];
+ for( u32 i=0; i<buffer_length; i ++ )
+ {
+ parser->stat_source_characters ++;
+ char c = buffer[i];
if( c == '\0' )
break;
- u32 link_id = 0, link_depth = 0;
+ bool is_control_character = 0;
+ if( parser->token0_deliminator )
+ {
+ if( c == parser->token0_deliminator )
+ is_control_character = 1;
+ }
+ else
+ {
+ if( c==' '||c=='\t'||c=='\r'||c=='\n'||c=='{'||c=='}' )
+ is_control_character = 1;
+ }
- if( (delim && (c == delim)) || (!delim && (c==' '||c=='\t'||c=='\r'||c=='\n'||c=='{'||c=='}')) )
+ if( is_control_character )
{
- if( token_length )
+ if( parser->token0_length )
{
- token_length --;
- if( t0_length )
+ parser->token0_length --;
+ parser->token0_buffer = vg_stack_extend_last( parser->kvs->stack, 1 );
+ parser->token0_buffer[ parser->token0_length ] = '\0';
+
+ if( parser->token1_length )
{
- /* pair */
- link_id = out_kvs->kv_count ++;
- link_depth = depth;
-
- out_kvs->kvs = vg_stack_extend_last( stack, sizeof(vg_kv) );
- vg_kv *kv = &out_kvs->kvs[ link_id ];
- kv->key_info = (0xFFFFF & t0_hash) | (0x3FF & t0_length)<<20 | (0x1) << 30;
- kv->key_offset = t0_start;
- kv->brother_offset = 0; /* deffered */
- kv->value.offset_from_key = token_start - t0_start;
- kv->value.length = token_length;
-
- t0_length = 0;
+ /* KV pair */
+ vg_kv *kv = vg_kvs_newkv( parser->kvs );
+ kv->key_info = (0xFFFFF & parser->token1_hash) |
+ (0x3FF & parser->token1_length)<<20 |
+ (0x1) << 30;
+ kv->key_offset = parser->token1_start_offset;
+ kv->value.offset_from_key = parser->token0_start_offset - parser->token1_start_offset;
+ kv->value.length = parser->token0_length;
+ parser->token1_length = 0;
+
+ vg_kv_link( parser, vg_stack_offset( parser->kvs->stack, kv ), parser->depth );
}
else
{
- t0_start = token_start;
- t0_length = token_length;
- t0_hash = token_hash;
+ /* shift */
+ parser->token1_start_offset = parser->token0_start_offset;
+ parser->token1_length = parser->token0_length;
+ parser->token1_hash = parser->token0_hash;
}
- token_length = 0;
+ parser->token0_length = 0;
}
if( c=='{'||c=='}'||c=='\n' )
{
- /* SYNTAX TOKEN */
if( c == '{' )
{
- link_id = out_kvs->kv_count ++;
- link_depth = depth;
-
- out_kvs->kvs = vg_stack_extend_last( stack, sizeof(vg_kv) );
- vg_kv *kv = &out_kvs->kvs[ link_id ];
- kv->brother_offset = 0; /* deffered */
- kv->children = 0;
- if( t0_length )
+ vg_kv *kv = vg_kvs_newkv( parser->kvs );
+ if( parser->token1_length )
{
- /* frame with name */
- kv->key_info = (0xFFFFF & t0_hash) | (0x3FF & t0_length) << 20;
- kv->key_offset = t0_start;
+ kv->key_info = (0xFFFFF & parser->token1_hash) | (0x3FF & parser->token1_length) << 20;
+ kv->key_offset = parser->token1_start_offset;
}
else
- {
- /* frame with no name */
kv->key_info = 5381;
- kv->key_offset = 0;
- }
- t0_length = 0;
- depth ++;
- frame_stack[ depth ].latest_child_id = 0;
- frame_stack[ depth ].frame_id = link_id;
+ u32 id = vg_stack_offset( parser->kvs->stack, kv ),
+ depth = parser->depth;
+
+ parser->depth ++;
+ parser->frame_stack[ parser->depth ].latest_child_offset = 0;
+ parser->frame_stack[ parser->depth ].frame_offset = id;
+ vg_kv_link( parser, id, depth );
+ parser->token1_length = 0;
}
else if( c == '}' )
{
- if( depth )
- depth --;
- t0_length = 0;
+ if( parser->depth )
+ parser->depth --;
+ parser->token1_length = 0;
}
}
- delim = 0;
+ parser->token0_deliminator = 0;
}
else
{
- if( token_length )
+ if( parser->token0_length )
{
- token_length ++;
- token_hash = ((token_hash << 5) + token_hash) + (u32)c;
+ parser->token0_buffer = vg_stack_extend_last( parser->kvs->stack, 1 );
+ parser->token0_buffer[ parser->token0_length-1 ] = c;
+ parser->token0_length ++;
+ parser->token0_hash = ((parser->token0_hash << 5) + parser->token0_hash) + (u32)c;
+ parser->stat_memory_strings ++;
}
else
{
- if( c=='"'||c=='\'' )
+ if( c =='"' || c=='\'' )
{
- delim = c;
- token_start = i+1;
- token_length = 1;
- token_hash = 5381;
+ parser->token0_buffer = vg_stack_allocate( parser->kvs->stack, 0, 1, "KV string" );
+ parser->token0_start_offset = vg_stack_offset( parser->kvs->stack, parser->token0_buffer );
+ parser->token0_deliminator = c;
+ parser->token0_hash = 5381;
+ parser->token0_length = 1;
}
else
{
- token_start = i;
- token_length = 2;
- token_hash = ((5381<<5)+5381) + (u32)c;
+ parser->token0_buffer = vg_stack_allocate( parser->kvs->stack, 1, 1, "KV string" );
+ parser->token0_start_offset = vg_stack_offset( parser->kvs->stack, parser->token0_buffer );
+ parser->token0_buffer[0] = c;
+ parser->token0_length = 2;
+ parser->token0_hash = ((5381<<5)+5381) + (u32)c;
+ parser->stat_memory_strings ++;
}
}
}
-
- if( link_id )
- {
- u32 parent_id = frame_stack[link_depth].frame_id;
- vg_kv *parent = &out_kvs->kvs[ parent_id ];
- parent->children ++;
-
- u32 brother_id = frame_stack[link_depth].latest_child_id;
- if( brother_id )
- {
- vg_kv *brother = &out_kvs->kvs[ brother_id ];
- VG_ASSERT( brother->brother_offset == 0 );
- brother->brother_offset = link_id - brother_id;
- }
- frame_stack[ link_depth ].latest_child_id = link_id;
- }
}
-}
+}
-u32 vg_kv_type( vg_kvs *kvs, u32 kv_id ){ return (kvs->kvs[ kv_id ].key_info >> 30) & 0x3; }
+u32 vg_kv_type( vg_kvs *kvs, u32 kv_offset )
+{
+ vg_kv *kv = vg_stack_pointer( kvs->stack, kv_offset );
+ return (kv->key_info >> 30) & 0x3;
+}
-const char *vg_kv_key( vg_kvs *kvs, u32 kv_id, u32 *out_length )
+const char *vg_kv_key( vg_kvs *kvs, u32 kv_offset, u32 *out_length )
{
- vg_kv *kv = &kvs->kvs[ kv_id ];
+ vg_kv *kv = vg_stack_pointer( kvs->stack, kv_offset );
*out_length = (kv->key_info >> 20) & 0x3FF;
- return (*out_length)? kvs->source_buffer + kv->key_offset: NULL;
+ return (*out_length)? vg_stack_pointer( kvs->stack, kv->key_offset ): NULL;
}
-const char *vg_kv_value( vg_kvs *kvs, u32 kv_id, u32 *out_length )
+const char *vg_kv_value( vg_kvs *kvs, u32 kv_offset, u32 *out_length )
{
- u32 type = vg_kv_type( kvs, kv_id );
- if( type == 0x0 )
+ if( vg_kv_type( kvs, kv_offset ) == 0x0 )
return NULL;
else
{
- vg_kv *kv = &kvs->kvs[ kv_id ];
+ vg_kv *kv = vg_stack_pointer( kvs->stack, kv_offset );
*out_length = kv->value.length;
- return kvs->source_buffer + (kv->key_offset + kv->value.offset_from_key);
+ return vg_stack_pointer( kvs->stack, kv->key_offset + kv->value.offset_from_key );
}
}
-u32 vg_kv_child_count( vg_kvs *kvs, u32 kv_id )
+u32 vg_kv_next( vg_kvs *kvs, u32 kv_offset )
{
- u32 type = vg_kv_type( kvs, kv_id );
- if( type == 0x0 )
- return kvs->kvs[ kv_id ].children;
- else
- return 0;
+ vg_kv *kv = vg_stack_pointer( kvs->stack, kv_offset );
+ return kv->brother_offset;
}
-u32 vg_kv_child( vg_kvs *kvs, u32 root_id, u32 index )
+u32 vg_kv_child( vg_kvs *kvs, u32 root_offset, u32 index )
{
- VG_ASSERT( index == 0 );
-
- if( vg_kv_child_count( kvs, root_id ) )
- return root_id +1;
- else
- return 0;
-}
+ if( vg_kv_type( kvs, root_offset ) == 0x0 )
+ {
+ vg_kv *parent = vg_stack_pointer( kvs->stack, root_offset );
+ u32 offset = parent->first_child_offset;
-u32 vg_kv_next( vg_kvs *kvs, u32 kv_id )
-{
- u32 offset = kvs->kvs[ kv_id ].brother_offset;
- if( offset )
- return kv_id + offset;
- else
- return 0;
+ for( u32 i=0; (i<index) && offset; i ++ )
+ {
+ vg_kv *kv = vg_stack_pointer( kvs->stack, offset );
+ offset = kv->brother_offset;
+ }
+ return offset;
+ }
+ else return 0;
}
-u32 vg_kv_find( vg_kvs *kvs, u32 root_id, const char *key )
+u32 vg_kv_find( vg_kvs *kvs, u32 root_offset, const char *key )
{
u32 hash = vg_strdjb2( key );
- u32 child_id = vg_kv_child( kvs, root_id, 0 );
- while( child_id )
+ u32 child_offset = vg_kv_child( kvs, root_offset, 0 );
+ while( child_offset )
{
- vg_kv *kv = &kvs->kvs[ child_id ];
+ vg_kv *kv = vg_stack_pointer( kvs->stack, child_offset );
+ u32 key_length;
+ const char *child_key = vg_kv_key( kvs, child_offset, &key_length );
if( ((kv->key_info ^ hash) & 0xFFFFF) == 0 )
{
u32 key_length;
- const char *child_key = vg_kv_key( kvs, child_id, &key_length );
+ const char *child_key = vg_kv_key( kvs, child_offset, &key_length );
if( child_key )
{
for( u32 i=0; i<key_length; i ++ )
if( child_key[i] != key[i] )
goto next;
- return child_id;
+ return child_offset;
}
}
- next:child_id = vg_kv_next( kvs, child_id );
+ next:child_offset = vg_kv_next( kvs, child_offset );
}
-
return 0;
}
-typedef struct vg_kv_write vg_kv_write;
-struct vg_kv_write
-{
- FILE *fp;
- u32 depth;
-};
-
-void vg_kv_write_indent( vg_kv_write *w )
+static void vg_kv_write_indent( vg_kv_write *w )
{
for( u32 i=0; i<w->depth; i ++ )
fputc( ' ', w->fp );
}
-void vg_kv_write_string( vg_kv_write *w, const char *string, u32 length )
+static void vg_kv_write_string( vg_kv_write *w, const char *string, u32 length )
{
if( length == 0 )
length = 0xffffffff;
fputc( '\n', w->fp );
}
-#if 0
-void vg_kv_print_info( vg_kvs *kvs )
+void vg_kv_parser_print_info( vg_kv_parser *parser )
{
- vg_low( "Compression ratio: %.2f%%\n", ((f32)(kvs->kv_count * sizeof(vg_kv)) / (f32)kvs->source_length )*100.0f );
+ vg_low( "KV stats\n"
+ " bytes pages: %u\n"
+ " bytes strings: %u\n"
+ " source characters: %u. compression ratio: %.2f%%\n", parser->kvs->stat_memory_pages,
+ parser->stat_memory_strings,
+ parser->stat_source_characters,
+ (f32)(parser->stat_memory_strings+parser->kvs->stat_memory_pages) /
+ (f32)parser->stat_source_characters * 100.0f );
}
-void vg_kv_print_tree( vg_kv_write *w, vg_kvs *kvs, u32 root_id )
+void vg_kv_print_tree( vg_kv_write *w, vg_kvs *kvs, u32 root_offset )
{
- VG_ASSERT( vg_kv_type( kvs, root_id ) == 0x0 );
+ VG_ASSERT( vg_kv_type( kvs, root_offset ) == 0x0 );
u32 root_len;
- const char *root_str = vg_kv_key( kvs, root_id, &root_len );
+ const char *root_str = vg_kv_key( kvs, root_offset, &root_len );
vg_kv_write_block( w, root_str, root_len );
- u32 child_id = vg_kv_child( kvs, root_id, 0 );
- while( child_id )
+ u32 child_offset = vg_kv_child( kvs, root_offset, 0 );
+ while( child_offset )
{
- if( vg_kv_type( kvs, child_id ) == 0x0 )
- vg_kv_print_tree( w, kvs, child_id );
+ if( vg_kv_type( kvs, child_offset ) == 0x0 )
+ vg_kv_print_tree( w, kvs, child_offset );
else
{
u32 key_len;
- const char *key_str = vg_kv_key( kvs, child_id, &key_len );
+ const char *key_str = vg_kv_key( kvs, child_offset, &key_len );
u32 value_len;
- const char *value_str = vg_kv_value( kvs, child_id, &value_len );
+ const char *value_str = vg_kv_value( kvs, child_offset, &value_len );
if( key_str && value_str )
vg_kv_write_kv( w, key_str, key_len, value_str, value_len );
}
- child_id = vg_kv_next( kvs, child_id );
+ child_offset = vg_kv_next( kvs, child_offset );
}
vg_kv_end_block( w );
-#if 0
- vg_info( "%.*s}\n", depth*2, k_whitespace );
-#endif
}
-#endif
#pragma once
#include "vg_platform.h"
+#include "vg_mem.h"
+
+#define VG_KV_PAGE_COUNT 32
+
+/* define VG_KV_SUPPORT_LEGACY_MSG to automatically convert from old vg_msg stream to plain text KVs */
-typedef struct vg_kvs vg_kvs;
typedef struct vg_kv vg_kv;
-struct vg_kvs
+typedef struct vg_kvs vg_kvs;
+typedef struct vg_kv_parser vg_kv_parser;
+typedef struct vg_kv_write vg_kv_write;
+
+/* Initialize kvs set and set the allocator for it */
+void vg_kvs_init( vg_kvs *kvs, vg_stack_allocator *stack );
+
+/* Initialize KV parser ready to accept buffer fragments
+ * out_kvs must be initialized
+ *
+ * vg_kv_parse_buffer takes a text buffer, if its null terminated, buffer_length can be 0
+ */
+void vg_kv_parser_init( vg_kv_parser *parser, vg_kvs *out_kvs );
+void vg_kv_parse_buffer( vg_kv_parser *parser, const char *buffer, u32 buffer_length );
+void vg_kv_link( vg_kv_parser *parser, u32 offset, u32 depth );
+
+/* returns the type of this KV.
+ * 0: frame
+ * 1: kv pair
+ */
+u32 vg_kv_type( vg_kvs *kvs, u32 kv_offset );
+
+/* get key / values associated with KV pair or only key for a frame. */
+const char *vg_kv_key( vg_kvs *kvs, u32 kv_offset, u32 *out_length );
+const char *vg_kv_value( vg_kvs *kvs, u32 kv_offset, u32 *out_length );
+
+/* get the child KV at index, returns 0 if out of range */
+u32 vg_kv_child( vg_kvs *kvs, u32 root_offset, u32 index );
+u32 vg_kv_next( vg_kvs *kvs, u32 kv_offset );
+u32 vg_kv_find( vg_kvs *kvs, u32 root_offset, const char *key );
+
+/* editing kvs
+ * if value is NULL, it appends a named frame
+ * if key is NULL, value must also be NULL. it appends a nameless frame
+ * if both key and value are set, it appends a KV pair
+ * returns the new KV offset
+ */
+u32 vg_kv_append( vg_kvs *kvs, u32 parent_offset, const char *key, const char *value )
+
+/* Writing KV files. w should be initialized with depth 0, and fp to a valid C stream pointer */
+void vg_kv_write_block( vg_kv_write *w, const char *name, u32 name_length );
+void vg_kv_end_block( vg_kv_write *w );
+void vg_kv_write_kv( vg_kv_write *w, const char *key, u32 key_len, const char *value, u32 value_len );
+
+/* Just for analysis */
+void vg_kv_parser_print_info( vg_kv_parser *parser );
+void vg_kv_print_tree( vg_kv_write *w, vg_kvs *kvs, u32 root_offset );
+
+struct vg_kv_parser
{
- const char *source_buffer;
- u32 source_length;
+ vg_kvs *kvs;
+ u32 token0_start_offset, token0_length, token0_hash,
+ token1_start_offset, token1_length, token1_hash;
+
+ u32 stat_memory_strings,
+ stat_source_characters;
+
+ char token0_deliminator;
+ char *token0_buffer;
+ u32 depth;
+
+ struct
+ {
+ u32 frame_offset, latest_child_offset;
+ }
+ frame_stack[64];
+};
- vg_kv *kvs;
- u32 kv_count;
+struct vg_kvs
+{
+ vg_stack_allocator *stack;
+ u32 root_offset;
+ u32 kv_page_offset, kv_page_count;
+ u32 stat_memory_pages;
};
struct vg_kv
{
u32 key_info; /* 20 bit hash, 10 bit key length, 2 bit type */
- u32 key_offset; /* 32 bit, indexes into the raw text buffer */
- u32 brother_offset;/* 24 bits, relative jump past all descendents to the next key. If 0, no brothers */
+ u32 key_offset; /* 32 bit, indexes kvs->stack.data. same for any other _offset field */
+ u32 brother_offset;
union
{
}
value;
- u32 children; /* 24 bits */
+ u32 first_child_offset;
};
};
+
+struct vg_kv_write
+{
+ FILE *fp;
+ u32 depth;
+};