diff options
| -rw-r--r-- | Makefile | 19 | ||||
| -rw-r--r-- | src/blocks.c | 1352 | ||||
| -rw-r--r-- | src/bstrlib.c | 2979 | ||||
| -rw-r--r-- | src/bstrlib.h | 304 | ||||
| -rw-r--r-- | src/buffer.c | 313 | ||||
| -rw-r--r-- | src/buffer.h | 119 | ||||
| -rw-r--r-- | src/case_fold_switch.inc (renamed from src/case_fold_switch.c) | 0 | ||||
| -rw-r--r-- | src/casefold.c | 2699 | ||||
| -rw-r--r-- | src/detab.c | 48 | ||||
| -rw-r--r-- | src/getopt.c | 199 | ||||
| -rw-r--r-- | src/inlines.c | 1711 | ||||
| -rw-r--r-- | src/main.c | 2 | ||||
| -rw-r--r-- | src/scanners.h | 28 | ||||
| -rw-r--r-- | src/scanners.re | 54 | ||||
| -rw-r--r-- | src/stmd.h | 76 | ||||
| -rw-r--r-- | src/utf8.c | 221 | 
16 files changed, 2252 insertions, 7872 deletions
@@ -1,5 +1,5 @@ -CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-g -O3 -Wall -Werror +CFLAGS=-ggdb3 -O0 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS=-ggdb3 -O0 -Wall -Werror  SRCDIR=src  DATADIR=data @@ -41,13 +41,13 @@ testjs: spec.txt  benchjs:  	node js/bench.js ${BENCHINP} -$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/blocks.o $(SRCDIR)/detab.o $(SRCDIR)/bstrlib.o $(SRCDIR)/scanners.o $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o +$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o  	$(CC) $(LDFLAGS) -o $@ $^  $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re  	re2c --case-insensitive -bis $< > $@ || (rm $@ && false) -$(SRCDIR)/case_fold_switch.c: $(DATADIR)/CaseFolding-3.2.0.txt +$(SRCDIR)/case_fold_switch.inc $(DATADIR)/CaseFolding-3.2.0.txt  	perl mkcasefold.pl < $< > $@  .PHONY: leakcheck clean fuzztest dingus upload @@ -58,6 +58,9 @@ dingus:  leakcheck: $(PROG)  	cat oldtests/*/*.markdown | valgrind --leak-check=full --dsymutil=yes $(PROG) +operf: $(PROG) +	operf $(PROG) <bench.md >/dev/null +  fuzztest:  	for i in `seq 1 10`; do \  	  time cat /dev/urandom | head -c 100000 | iconv -f latin1 -t utf-8 | $(PROG) >/dev/null; done @@ -69,7 +72,7 @@ update-site: spec.html narrative.html  	(cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..)  clean: -	-rm test $(SRCDIR)/*.o $(SRCDIR)/scanners.c -	-rm -r *.dSYM -	-rm README.html -	-rm spec.md fuzz.txt spec.html +	-rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c +	-rm -rf *.dSYM +	-rm -f README.html +	-rm -f spec.md fuzz.txt spec.html diff --git a/src/blocks.c b/src/blocks.c index 2776231..eabac03 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1,4 +1,5 @@  #include <stdlib.h> +#include <assert.h>  #include <stdio.h>  #include <stdbool.h>  #include <ctype.h> @@ -10,738 +11,767 @@  static block* make_block(int tag, int start_line, int start_column)  { -  block* e; -  e = (block*) malloc(sizeof(block)); -  e->tag = tag; -  e->open = true; -  e->last_line_blank = false; -  e->start_line = start_line; -  e->start_column = start_column; -  e->end_line = start_line; -  e->children = NULL; -  e->last_child = NULL; -  e->parent = NULL; -  e->top = NULL; -  e->attributes.refmap = NULL; -  e->string_content = bfromcstr(""); -  e->inline_content = NULL; -  e->next = NULL; -  e->prev = NULL; -  return e; +	block* e; +	e = (block*) malloc(sizeof(block)); +	e->tag = tag; +	e->open = true; +	e->last_line_blank = false; +	e->start_line = start_line; +	e->start_column = start_column; +	e->end_line = start_line; +	e->children = NULL; +	e->last_child = NULL; +	e->parent = NULL; +	e->top = NULL; +	e->attributes.refmap = NULL; +	gh_buf_init(&e->string_content, 32); +	e->string_pos = 0; +	e->inline_content = NULL; +	e->next = NULL; +	e->prev = NULL; +	return e;  }  // Create a root document block.  extern block* make_document()  { -  block * e = make_block(document, 1, 1); -  reference * map = NULL; -  reference ** refmap; -  refmap = (reference**) malloc(sizeof(reference*)); -  *refmap = map; -  e->attributes.refmap = refmap; -  e->top = e; -  return e; +	block * e = make_block(document, 1, 1); +	reference * map = NULL; +	reference ** refmap; +	refmap = (reference**) malloc(sizeof(reference*)); +	*refmap = map; +	e->attributes.refmap = refmap; +	e->top = e; +	return e;  }  // Returns true if line has only space characters, else false. -bool is_blank(bstring s, int offset) +bool is_blank(gh_buf *s, int offset)  { -  char c; -  while ((c = bchar(s, offset))) { -    if (c == '\n') { -      return true; -    } else if (c == ' ') { -      offset++; -    } else { -      return false; -    } -  } -  return true; +	while (offset < s->size) { +		switch (s->ptr[offset]) { +			case '\n': +				return true; +			case ' ': +				offset++; +			default: +				return false; +		} +	} + +	return true;  }  static inline bool can_contain(int parent_type, int child_type)  { -  return ( parent_type == document || -           parent_type == block_quote || -           parent_type == list_item || -           (parent_type == list && child_type == list_item) ); +	return ( parent_type == document || +			parent_type == block_quote || +			parent_type == list_item || +			(parent_type == list && child_type == list_item) );  }  static inline bool accepts_lines(int block_type)  { -  return (block_type == paragraph || -          block_type == atx_header || -          block_type == indented_code || -          block_type == fenced_code); +	return (block_type == paragraph || +			block_type == atx_header || +			block_type == indented_code || +			block_type == fenced_code);  } -static int add_line(block* block, bstring ln, int offset) +static void add_line(block* block, gh_buf *ln, int offset)  { -  bstring s = bmidstr(ln, offset, blength(ln) - offset); -  check(block->open, "attempted to add line (%s) to closed container (%d)", -        ln->data, block->tag); -  check(bformata(block->string_content, "%s", s->data) == 0, -        "could not append line to string_content"); -  bdestroy(s); -  return 0; - error: -  return -1; +	assert(block->open); +	gh_buf_put(&block->string_content, ln->ptr + offset, ln->size - offset);  } -static int remove_trailing_blank_lines(bstring ln) +static void remove_trailing_blank_lines(gh_buf *ln)  { -  bstring tofind = bfromcstr(" \t\r\n"); -  int pos; -  // find last nonspace: -  pos = bninchrr(ln, blength(ln) - 1, tofind); -  if (pos == BSTR_ERR) { // all spaces -    bassigncstr(ln, ""); -  } else { -    // find next newline after it -    pos = bstrchrp(ln, '\n', pos); -    if (pos != BSTR_ERR) { -      check(bdelete(ln, pos, blength(ln) - pos) != BSTR_ERR, -        "failed to delete trailing blank lines"); -    } -  } -  bdestroy(tofind); -  return 0; - error: -  return -1; +	int i; + +	for (i = ln->size - 1; i >= 0; --i) { +		char c = ln->ptr[i]; + +		if (c != ' ' && c != '\t' && c != '\r' && c != '\n') +			break; +	} + +	if (i < 0) { +		gh_buf_clear(ln); +		return; +	} + +	i = gh_buf_strchr(ln, '\n', i); +	if (i >= 0) +		gh_buf_truncate(ln, i + 1);  }  // Check to see if a block ends with a blank line, descending  // if needed into lists and sublists.  static bool ends_with_blank_line(block* block)  { -  if (block->last_line_blank) { -    return true; -  } -  if ((block->tag == list || block->tag == list_item) && block->last_child) { -    return ends_with_blank_line(block->last_child); -  } else { -    return false; -  } +	if (block->last_line_blank) { +		return true; +	} +	if ((block->tag == list || block->tag == list_item) && block->last_child) { +		return ends_with_blank_line(block->last_child); +	} else { +		return false; +	}  }  // Break out of all containing lists  static int break_out_of_lists(block ** bptr, int line_number)  { -  block * container = *bptr; -  block * b = container->top; -  // find first containing list: -  while (b && b->tag != list) { -    b = b->last_child; -  } -  if (b) { -    while (container && container != b) { -      finalize(container, line_number); -      container = container->parent; -    } -    finalize(b, line_number); -    *bptr = b->parent; -  } -  return 0; +	block * container = *bptr; +	block * b = container->top; +	// find first containing list: +	while (b && b->tag != list) { +		b = b->last_child; +	} +	if (b) { +		while (container && container != b) { +			finalize(container, line_number); +			container = container->parent; +		} +		finalize(b, line_number); +		*bptr = b->parent; +	} +	return 0;  } -extern int finalize(block* b, int line_number) +extern void finalize(block* b, int line_number)  { -  int firstlinelen; -  int pos; -  block* item; -  block* subitem; - -  check(b != NULL, "finalize called on null block"); -  if (!b->open) { -    return 0; // don't do anything if the block is already closed -  } -  b->open = false; -  if (line_number > b->start_line) { -    b->end_line = line_number - 1; -  } else { -    b->end_line = line_number; -  } - -  switch (b->tag) { - -  case paragraph: -    pos = 0; -    while (bchar(b->string_content, 0) == '[' && -           (pos = parse_reference(b->string_content, -                                  b->top->attributes.refmap))) { -      bdelete(b->string_content, 0, pos); -    } -    if (is_blank(b->string_content, 0)) { -      b->tag = reference_def; -    } -    break; - -  case indented_code: -    remove_trailing_blank_lines(b->string_content); -    bformata(b->string_content, "\n"); -    break; - -  case fenced_code: -    // first line of contents becomes info -    firstlinelen = bstrchr(b->string_content, '\n'); -    b->attributes.fenced_code_data.info = -      bmidstr(b->string_content, 0, firstlinelen); -    bdelete(b->string_content, 0, firstlinelen + 1); // +1 for \n -    btrimws(b->attributes.fenced_code_data.info); -    unescape(b->attributes.fenced_code_data.info); -    break; - -  case list: // determine tight/loose status -    b->attributes.list_data.tight = true; // tight by default -    item = b->children; - -    while (item) { -      // check for non-final non-empty list item ending with blank line: -      if (item->last_line_blank && item->next) { -        b->attributes.list_data.tight = false; -        break; -      } -      // recurse into children of list item, to see if there are -      // spaces between them: -      subitem = item->children; -      while (subitem) { -        if (ends_with_blank_line(subitem) && -            (item->next || subitem->next)) { -          b->attributes.list_data.tight = false; -          break; -        } -        subitem = subitem->next; -      } -      if (!(b->attributes.list_data.tight)) { -        break; -      } -      item = item->next; -    } - -    break; - -  default: -    break; -  } - -  return 0; - error: -  return -1; +	int firstlinelen; +	int pos; +	block* item; +	block* subitem; + +	if (!b->open) +		return; // don't do anything if the block is already closed + +	b->open = false; +	if (line_number > b->start_line) { +		b->end_line = line_number - 1; +	} else { +		b->end_line = line_number; +	} + +	switch (b->tag) { +		case paragraph: +			pos = 0; +			while (gh_buf_at(&b->string_content, b->string_pos) == '[' && +					(pos = parse_reference(&b->string_content, b->string_pos, +										   b->top->attributes.refmap))) { +				b->string_pos = pos; +			} +			if (is_blank(&b->string_content, b->string_pos)) { +				b->tag = reference_def; +			} +			break; + +		case indented_code: +			remove_trailing_blank_lines(&b->string_content); +			gh_buf_putc(&b->string_content, '\n'); +			break; + +		case fenced_code: +			// first line of contents becomes info +			firstlinelen = gh_buf_strchr(&b->string_content, '\n', b->string_pos); +			gh_buf_set( +				&b->attributes.fenced_code_data.info, +				b->string_content.ptr + b->string_pos, +				firstlinelen +			); + +			b->string_pos = firstlinelen + 1; + +			gh_buf_trim(&b->attributes.fenced_code_data.info); +			unescape_buffer(&b->attributes.fenced_code_data.info); +			break; + +		case list: // determine tight/loose status +			b->attributes.list_data.tight = true; // tight by default +			item = b->children; + +			while (item) { +				// check for non-final non-empty list item ending with blank line: +				if (item->last_line_blank && item->next) { +					b->attributes.list_data.tight = false; +					break; +				} +				// recurse into children of list item, to see if there are +				// spaces between them: +				subitem = item->children; +				while (subitem) { +					if (ends_with_blank_line(subitem) && +							(item->next || subitem->next)) { +						b->attributes.list_data.tight = false; +						break; +					} +					subitem = subitem->next; +				} +				if (!(b->attributes.list_data.tight)) { +					break; +				} +				item = item->next; +			} + +			break; + +		default: +			break; +	}  }  // Add a block as child of another.  Return pointer to child.  extern block* add_child(block* parent, -                        int block_type, int start_line, int start_column) +		int block_type, int start_line, int start_column)  { -  // if 'parent' isn't the kind of block that can accept this child, -  // then back up til we hit a block that can. -  while (!can_contain(parent->tag, block_type)) { -    finalize(parent, start_line); -    parent = parent->parent; -  } - -  check(parent != NULL, "parent container cannot accept children"); - -  block* child = make_block(block_type, start_line, start_column); -  child->parent = parent; -  child->top = parent->top; - -  if (parent->last_child) { -    parent->last_child->next = child; -    child->prev = parent->last_child; -  } else { -    parent->children = child; -    child->prev = NULL; -  } -  parent->last_child = child; -  return child; - error: -  return NULL; +	assert(parent); + +	// if 'parent' isn't the kind of block that can accept this child, +	// then back up til we hit a block that can. +	while (!can_contain(parent->tag, block_type)) { +		finalize(parent, start_line); +		parent = parent->parent; +	} + +	block* child = make_block(block_type, start_line, start_column); +	child->parent = parent; +	child->top = parent->top; + +	if (parent->last_child) { +		parent->last_child->next = child; +		child->prev = parent->last_child; +	} else { +		parent->children = child; +		child->prev = NULL; +	} +	parent->last_child = child; +	return child;  }  // Free a block list and any children.  extern void free_blocks(block* e)  { -  block * next; -  while (e != NULL) { -    next = e->next; -    free_inlines(e->inline_content); -    bdestroy(e->string_content); -    if (e->tag == fenced_code) { -      bdestroy(e->attributes.fenced_code_data.info); -    } else if (e->tag == document) { -      free_reference_map(e->attributes.refmap); -    } -    free_blocks(e->children); -    free(e); -    e = next; -  } +	block * next; +	while (e != NULL) { +		next = e->next; +		free_inlines(e->inline_content); +		gh_buf_free(&e->string_content); +		if (e->tag == fenced_code) { +			gh_buf_free(&e->attributes.fenced_code_data.info); +		} else if (e->tag == document) { +			free_reference_map(e->attributes.refmap); +		} +		free_blocks(e->children); +		free(e); +		e = next; +	}  }  // Walk through block and all children, recursively, parsing  // string content into inline content where appropriate. -int process_inlines(block* cur, reference** refmap) +void process_inlines(block* cur, reference** refmap)  { -  switch (cur->tag) { - -  case paragraph: -  case atx_header: -  case setext_header: -    check(cur->string_content != NULL, "string_content is NULL"); -    cur->inline_content = parse_inlines(cur->string_content, refmap); -    bdestroy(cur->string_content); -    cur->string_content = NULL; -    break; - -  default: -    break; -  } - -  block * child = cur->children; -  while (child != NULL) { -    process_inlines(child, refmap); -    child = child->next; -  } - -  return 0; - error: -  return -1; +	switch (cur->tag) { +		case paragraph: +		case atx_header: +		case setext_header: +			cur->inline_content = parse_inlines(&cur->string_content, cur->string_pos, refmap); +			// MEM +			// gh_buf_free(&cur->string_content); +			break; + +		default: +			break; +	} + +	block *child = cur->children; +	while (child != NULL) { +		process_inlines(child, refmap); +		child = child->next; +	}  }  // Attempts to parse a list item marker (bullet or enumerated).  // On success, returns length of the marker, and populates  // data with the details.  On failure, returns 0. -static int parse_list_marker(bstring ln, int pos, -                             struct ListData ** dataptr) +static int parse_list_marker(gh_buf *ln, int pos, +		struct ListData ** dataptr)  { -  char c; -  int startpos; -  int start = 1; -  struct ListData * data; - -  startpos = pos; -  c = bchar(ln, pos); - -  if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) { -    pos++; -    if (!isspace(bchar(ln, pos))) { -      return 0; -    } -    data = malloc(sizeof(struct ListData)); -    data->marker_offset = 0; // will be adjusted later -    data->list_type = bullet; -    data->bullet_char = c; -    data->start = 1; -    data->delimiter = period; -    data->tight = false; - -  } else if (isdigit(c)) { - -    pos++; -    while (isdigit(bchar(ln, pos))) { -      pos++; -    } - -    if (!sscanf((char *) ln->data + startpos, "%d", &start)) { -      log_err("sscanf failed"); -      return 0; -    } - -    c = bchar(ln, pos); -    if (c == '.' || c == ')') { -      pos++; -      if (!isspace(bchar(ln, pos))) { -        return 0; -      } -      data = malloc(sizeof(struct ListData)); -      data->marker_offset = 0; // will be adjusted later -      data->list_type = ordered; -      data->bullet_char = 0; -      data->start = start; -      data->delimiter = (c == '.' ? period : parens); -      data->tight = false; -    } else { -      return 0; -    } - -  } else { -    return 0; -  } - -  *dataptr = data; -  return (pos - startpos); +	char c; +	int startpos; +	struct ListData * data; + +	startpos = pos; +	c = gh_buf_at(ln, pos); + +	if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) { +		pos++; +		if (!isspace(gh_buf_at(ln, pos))) { +			return 0; +		} +		data = malloc(sizeof(struct ListData)); +		data->marker_offset = 0; // will be adjusted later +		data->list_type = bullet; +		data->bullet_char = c; +		data->start = 1; +		data->delimiter = period; +		data->tight = false; + +	} else if (isdigit(c)) { +		int start = 0; + +		do { +			start = (10 * start) + (gh_buf_at(ln, pos) - '0'); +			pos++; +		} while (isdigit(gh_buf_at(ln, pos))); + +		c = gh_buf_at(ln, pos); +		if (c == '.' || c == ')') { +			pos++; +			if (!isspace(gh_buf_at(ln, pos))) { +				return 0; +			} +			data = malloc(sizeof(struct ListData)); +			data->marker_offset = 0; // will be adjusted later +			data->list_type = ordered; +			data->bullet_char = 0; +			data->start = start; +			data->delimiter = (c == '.' ? period : parens); +			data->tight = false; +		} else { +			return 0; +		} + +	} else { +		return 0; +	} + +	*dataptr = data; +	return (pos - startpos);  }  // Return 1 if list item belongs in list, else 0.  static int lists_match(struct ListData list_data, -                       struct ListData item_data) +		struct ListData item_data) +{ +	return (list_data.list_type == item_data.list_type && +			list_data.delimiter == item_data.delimiter && +			// list_data.marker_offset == item_data.marker_offset && +			list_data.bullet_char == item_data.bullet_char); +} + +static void expand_tabs(gh_buf *ob, const char *line, size_t size) +{ +	size_t  i = 0, tab = 0; + +	while (i < size) { +		size_t org = i; + +		while (i < size && line[i] != '\t') { +			i++; tab++; +		} + +		if (i > org) +			gh_buf_put(ob, line + org, i - org); + +		if (i >= size) +			break; + +		do { +			gh_buf_putc(ob, ' '); tab++; +		} while (tab % 4); + +		i++; +	} +} + +extern block *stmd_parse_document(const char *buffer, size_t len)  { -  return (list_data.list_type == item_data.list_type && -          list_data.delimiter == item_data.delimiter && -          // list_data.marker_offset == item_data.marker_offset && -          list_data.bullet_char == item_data.bullet_char); +	gh_buf line = GH_BUF_INIT; + +	block *document = make_document(); +	int linenum = 1; +	const char *end = buffer + len; + +	while (buffer < end) { +		const char *eol = memchr(buffer, '\n', end - buffer); + +		if (!eol) { +			expand_tabs(&line, buffer, end - buffer); +			buffer = end; +		} else { +			expand_tabs(&line, buffer, (eol - buffer) + 1); +			buffer += (eol - buffer) + 1; +		} + +		incorporate_line(&line, linenum, &document); +		gh_buf_clear(&line); +		linenum++; +	} + +	gh_buf_free(&line); + +	while (document != document->top) { +		finalize(document, linenum); +		document = document->parent; +	} + +	finalize(document, linenum); +	process_inlines(document, document->attributes.refmap); + +	return document;  }  // Process one line at a time, modifying a block.  // Returns 0 if successful.  curptr is changed to point to  // the currently open block. -extern int incorporate_line(bstring ln, int line_number, block** curptr) +extern void incorporate_line(gh_buf *ln, int line_number, block** curptr)  { -  block* last_matched_container; -  int offset = 0; -  int matched = 0; -  int lev = 0; -  int i; -  struct ListData * data = NULL; -  bool all_matched = true; -  block* container; -  block* cur = *curptr; -  bool blank = false; -  int first_nonspace; -  int indent; - -  // detab input line -  check(bdetab(ln, 1) != BSTR_ERR, -        "invalid UTF-8 sequence in line %d\n", line_number); - -  // container starts at the document root. -  container = cur->top; - -  // for each containing block, try to parse the associated line start. -  // bail out on failure:  container will point to the last matching block. - -  while (container->last_child && container->last_child->open) { -    container = container->last_child; - -    first_nonspace = offset; -    while (bchar(ln, first_nonspace) == ' ') { -      first_nonspace++; -    } - -    indent = first_nonspace - offset; -    blank = bchar(ln, first_nonspace) == '\n'; - -    if (container->tag == block_quote) { - -      matched = indent <= 3 && bchar(ln, first_nonspace) == '>'; -      if (matched) { -        offset = first_nonspace + 1; -        if (bchar(ln, offset) == ' ') { -          offset++; -        } -      } else { -        all_matched = false; -      } - -    } else if (container->tag == list_item) { - -      if (indent >= container->attributes.list_data.marker_offset + -          container->attributes.list_data.padding) { -        offset += container->attributes.list_data.marker_offset + -          container->attributes.list_data.padding; -      } else if (blank) { -        offset = first_nonspace; -      } else { -        all_matched = false; -      } - -    } else if (container->tag == indented_code) { - -      if (indent >= CODE_INDENT) { -        offset += CODE_INDENT; -      } else if (blank) { -        offset = first_nonspace; -      } else { -        all_matched = false; -      } - -    } else if (container->tag == atx_header || -               container->tag == setext_header) { - -      // a header can never contain more than one line -      all_matched = false; - -    } else if (container->tag == fenced_code) { - -      // skip optional spaces of fence offset -      i = container->attributes.fenced_code_data.fence_offset; -      while (i > 0 && bchar(ln, offset) == ' ') { -        offset++; -        i--; -      } - -    } else if (container->tag == html_block) { - -      if (blank) { -        all_matched = false; -      } - -    } else if (container->tag == paragraph) { - -      if (blank) { -        container->last_line_blank =true; -        all_matched = false; -      } - -    } - -    if (!all_matched) { -      container = container->parent;  // back up to last matching block -      break; -    } -  } - -  last_matched_container = container; - -  // check to see if we've hit 2nd blank line, break out of list: -  if (blank && container->last_line_blank) { -    break_out_of_lists(&container, line_number); -  } - -  // unless last matched container is code block, try new container starts: -  while (container->tag != fenced_code && container->tag != indented_code && -         container->tag != html_block) { - -    first_nonspace = offset; -    while (bchar(ln, first_nonspace) == ' ') { -      first_nonspace++; -    } - -    indent = first_nonspace - offset; -    blank = bchar(ln, first_nonspace) == '\n'; - -    if (indent >= CODE_INDENT) { - -      if (cur->tag != paragraph && !blank) { -        offset += CODE_INDENT; -        container = add_child(container, indented_code, line_number, offset + 1); -      } else { // indent > 4 in lazy line -        break; -      } - -    } else if (bchar(ln, first_nonspace) == '>') { - -      offset = first_nonspace + 1; -      // optional following character -      if (bchar(ln, offset) == ' ') { -        offset++; -      } -      container = add_child(container, block_quote, line_number, offset + 1); - -    } else if ((matched = scan_atx_header_start(ln, first_nonspace))) { - -      offset = first_nonspace + matched; -      container = add_child(container, atx_header, line_number, offset + 1); -      int hashpos = bstrchrp(ln, '#', first_nonspace); -      check(hashpos != BSTR_ERR, "no # found in atx header start"); -      int level = 0; -      while (bchar(ln, hashpos) == '#') { -        level++; -        hashpos++; -      } -      container->attributes.header_level = level; - -    } else if ((matched = scan_open_code_fence(ln, first_nonspace))) { - -      container = add_child(container, fenced_code, line_number, -          first_nonspace + 1); -      container->attributes.fenced_code_data.fence_char = bchar(ln, -          first_nonspace); -      container->attributes.fenced_code_data.fence_length = matched; -      container->attributes.fenced_code_data.fence_offset = -        first_nonspace - offset; -      offset = first_nonspace + matched; - -    } else if ((matched = scan_html_block_tag(ln, first_nonspace))) { - -      container = add_child(container, html_block, line_number, -                            first_nonspace + 1); -      // note, we don't adjust offset because the tag is part of the text - -    } else if (container->tag == paragraph && -              (lev = scan_setext_header_line(ln, first_nonspace)) && -               // check that there is only one line in the paragraph: -               bstrrchrp(container->string_content, '\n', -                         blength(container->string_content) - 2) == BSTR_ERR) { - -        container->tag = setext_header; -        container->attributes.header_level = lev; -        offset = blength(ln) - 1; - -    } else if (!(container->tag == paragraph && !all_matched) && -               (matched = scan_hrule(ln, first_nonspace))) { - -      // it's only now that we know the line is not part of a setext header: -      container = add_child(container, hrule, line_number, first_nonspace + 1); -      finalize(container, line_number); -      container = container->parent; -      offset = blength(ln) - 1; - -    } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) { - -        // compute padding: -        offset = first_nonspace + matched; -        i = 0; -        while (i <= 5 && bchar(ln, offset + i) == ' ') { -          i++; -        } -        // i = number of spaces after marker, up to 5 -        if (i >= 5 || i < 1 || bchar(ln, offset) == '\n') { -          data->padding = matched + 1; -          if (i > 0) { -            offset += 1; -          } -        } else { -          data->padding = matched + i; -          offset += i; -        } - -        // check container; if it's a list, see if this list item -        // can continue the list; otherwise, create a list container. - -        data->marker_offset = indent; - -        if (container->tag != list || -            !lists_match(container->attributes.list_data, *data)) { -          container = add_child(container, list, line_number, -              first_nonspace + 1); -          container->attributes.list_data = *data; -        } - -        // add the list item -        container = add_child(container, list_item, line_number, -            first_nonspace + 1); -        container->attributes.list_data = *data; -        free(data); - -    } else { -      break; -    } - -    if (accepts_lines(container->tag)) { -      // if it's a line container, it can't contain other containers -      break; -    } -  } - -  // what remains at offset is a text line.  add the text to the -  // appropriate container. - -  first_nonspace = offset; -  while (bchar(ln, first_nonspace) == ' ') { -    first_nonspace++; -  } - -  indent = first_nonspace - offset; -  blank = bchar(ln, first_nonspace) == '\n'; - -  // block quote lines are never blank as they start with > -  // and we don't count blanks in fenced code for purposes of tight/loose -  // lists or breaking out of lists.  we also don't set last_line_blank -  // on an empty list item. -  container->last_line_blank = (blank && -                                container->tag != block_quote && -                                container->tag != fenced_code && -                                !(container->tag == list_item && -                                  container->children == NULL && -                                  container->start_line == line_number)); - -    block *cont = container; -    while (cont->parent) { -      cont->parent->last_line_blank = false; -      cont = cont->parent; -    } - -  if (cur != last_matched_container && -      container == last_matched_container && -      !blank && -      cur->tag == paragraph && -      blength(cur->string_content) > 0) { - -    check(add_line(cur, ln, offset) == 0, "could not add line"); - -  } else { // not a lazy continuation - -    // finalize any blocks that were not matched and set cur to container: -    while (cur != last_matched_container) { - -      finalize(cur, line_number); -      cur = cur->parent; -      check(cur != NULL, "cur is NULL, last_matched_container->tag = %d", -            last_matched_container->tag); - -    } - -    if (container->tag == indented_code) { - -      check(add_line(container, ln, offset) == 0, "could not add line"); - -    } else if (container->tag == fenced_code) { - -      matched = (indent <= 3 -        && bchar(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char) -        && scan_close_code_fence(ln, first_nonspace, -                                 container->attributes.fenced_code_data.fence_length); -      if (matched) { -        // if closing fence, don't add line to container; instead, close it: -        finalize(container, line_number); -        container = container->parent; // back up to parent -      } else { -        check(add_line(container, ln, offset) == 0, "could not add line"); -      } - -    } else if (container->tag == html_block) { - -      check(add_line(container, ln, offset) == 0, "could not add line"); - -    } else if (blank) { - -      // ??? do nothing - -    } else if (container->tag == atx_header) { - -      // chop off trailing ###s...use a scanner? -      brtrimws(ln); -      int p = blength(ln) - 1; -      int numhashes = 0; -      // if string ends in #s, remove these: -      while (bchar(ln, p) == '#') { -        p--; -        numhashes++; -      } -      if (bchar(ln, p) == '\\') { -        // the last # was escaped, so we include it. -        p++; -        numhashes--; -      } -      check(bdelete(ln, p + 1, numhashes) != BSTR_ERR, -            "could not delete final hashes"); -      check(add_line(container, ln, first_nonspace) == 0, "could not add line"); -      finalize(container, line_number); -      container = container->parent; - -    } else if (accepts_lines(container->tag)) { - -      check(add_line(container, ln, first_nonspace) == 0, "could not add line"); +	block* last_matched_container; +	int offset = 0; +	int matched = 0; +	int lev = 0; +	int i; +	struct ListData * data = NULL; +	bool all_matched = true; +	block* container; +	block* cur = *curptr; +	bool blank = false; +	int first_nonspace; +	int indent; + +	// container starts at the document root. +	container = cur->top; + +	// for each containing block, try to parse the associated line start. +	// bail out on failure:  container will point to the last matching block. + +	while (container->last_child && container->last_child->open) { +		container = container->last_child; + +		first_nonspace = offset; +		while (gh_buf_at(ln, first_nonspace) == ' ') { +			first_nonspace++; +		} + +		indent = first_nonspace - offset; +		blank = gh_buf_at(ln, first_nonspace) == '\n'; + +		if (container->tag == block_quote) { + +			matched = indent <= 3 && gh_buf_at(ln, first_nonspace) == '>'; +			if (matched) { +				offset = first_nonspace + 1; +				if (gh_buf_at(ln, offset) == ' ') { +					offset++; +				} +			} else { +				all_matched = false; +			} + +		} else if (container->tag == list_item) { + +			if (indent >= container->attributes.list_data.marker_offset + +					container->attributes.list_data.padding) { +				offset += container->attributes.list_data.marker_offset + +					container->attributes.list_data.padding; +			} else if (blank) { +				offset = first_nonspace; +			} else { +				all_matched = false; +			} + +		} else if (container->tag == indented_code) { + +			if (indent >= CODE_INDENT) { +				offset += CODE_INDENT; +			} else if (blank) { +				offset = first_nonspace; +			} else { +				all_matched = false; +			} + +		} else if (container->tag == atx_header || +				container->tag == setext_header) { + +			// a header can never contain more than one line +			all_matched = false; + +		} else if (container->tag == fenced_code) { + +			// skip optional spaces of fence offset +			i = container->attributes.fenced_code_data.fence_offset; +			while (i > 0 && gh_buf_at(ln, offset) == ' ') { +				offset++; +				i--; +			} + +		} else if (container->tag == html_block) { + +			if (blank) { +				all_matched = false; +			} + +		} else if (container->tag == paragraph) { + +			if (blank) { +				container->last_line_blank = true; +				all_matched = false; +			} + +		} + +		if (!all_matched) { +			container = container->parent;  // back up to last matching block +			break; +		} +	} + +	last_matched_container = container; + +	// check to see if we've hit 2nd blank line, break out of list: +	if (blank && container->last_line_blank) { +		break_out_of_lists(&container, line_number); +	} + +	// unless last matched container is code block, try new container starts: +	while (container->tag != fenced_code && container->tag != indented_code && +			container->tag != html_block) { + +		first_nonspace = offset; +		while (gh_buf_at(ln, first_nonspace) == ' ') { +			first_nonspace++; +		} + +		indent = first_nonspace - offset; +		blank = gh_buf_at(ln, first_nonspace) == '\n'; + +		if (indent >= CODE_INDENT) { + +			if (cur->tag != paragraph && !blank) { +				offset += CODE_INDENT; +				container = add_child(container, indented_code, line_number, offset + 1); +			} else { // indent > 4 in lazy line +				break; +			} + +		} else if (gh_buf_at(ln, first_nonspace) == '>') { + +			offset = first_nonspace + 1; +			// optional following character +			if (gh_buf_at(ln, offset) == ' ') { +				offset++; +			} +			container = add_child(container, block_quote, line_number, offset + 1); + +		} else if ((matched = scan_atx_header_start(ln, first_nonspace))) { + +			offset = first_nonspace + matched; +			container = add_child(container, atx_header, line_number, offset + 1); + +			int hashpos = gh_buf_strchr(ln, '#', first_nonspace); +			assert(hashpos >= 0); + +			int level = 0; +			while (gh_buf_at(ln, hashpos) == '#') { +				level++; +				hashpos++; +			} +			container->attributes.header_level = level; + +		} else if ((matched = scan_open_code_fence(ln, first_nonspace))) { + +			container = add_child(container, fenced_code, line_number, +					first_nonspace + 1); +			container->attributes.fenced_code_data.fence_char = gh_buf_at(ln, +					first_nonspace); +			container->attributes.fenced_code_data.fence_length = matched; +			container->attributes.fenced_code_data.fence_offset = +				first_nonspace - offset; +			offset = first_nonspace + matched; + +		} else if ((matched = scan_html_block_tag(ln, first_nonspace))) { + +			container = add_child(container, html_block, line_number, +					first_nonspace + 1); +			// note, we don't adjust offset because the tag is part of the text + +		} else if (container->tag == paragraph && +				(lev = scan_setext_header_line(ln, first_nonspace)) && +				// check that there is only one line in the paragraph: +				gh_buf_strrchr(&container->string_content, '\n', +					gh_buf_len(&container->string_content) - 2) < 0) { + +			container->tag = setext_header; +			container->attributes.header_level = lev; +			offset = gh_buf_len(ln) - 1; + +		} else if (!(container->tag == paragraph && !all_matched) && +				(matched = scan_hrule(ln, first_nonspace))) { + +			// it's only now that we know the line is not part of a setext header: +			container = add_child(container, hrule, line_number, first_nonspace + 1); +			finalize(container, line_number); +			container = container->parent; +			offset = gh_buf_len(ln) - 1; + +		} else if ((matched = parse_list_marker(ln, first_nonspace, &data))) { + +			// compute padding: +			offset = first_nonspace + matched; +			i = 0; +			while (i <= 5 && gh_buf_at(ln, offset + i) == ' ') { +				i++; +			} +			// i = number of spaces after marker, up to 5 +			if (i >= 5 || i < 1 || gh_buf_at(ln, offset) == '\n') { +				data->padding = matched + 1; +				if (i > 0) { +					offset += 1; +				} +			} else { +				data->padding = matched + i; +				offset += i; +			} + +			// check container; if it's a list, see if this list item +			// can continue the list; otherwise, create a list container. + +			data->marker_offset = indent; + +			if (container->tag != list || +					!lists_match(container->attributes.list_data, *data)) { +				container = add_child(container, list, line_number, +						first_nonspace + 1); +				container->attributes.list_data = *data; +			} + +			// add the list item +			container = add_child(container, list_item, line_number, +					first_nonspace + 1); +			container->attributes.list_data = *data; +			free(data); + +		} else { +			break; +		} + +		if (accepts_lines(container->tag)) { +			// if it's a line container, it can't contain other containers +			break; +		} +	} + +	// what remains at offset is a text line.  add the text to the +	// appropriate container. + +	first_nonspace = offset; +	while (gh_buf_at(ln, first_nonspace) == ' ') { +		first_nonspace++; +	} + +	indent = first_nonspace - offset; +	blank = gh_buf_at(ln, first_nonspace) == '\n'; + +	// block quote lines are never blank as they start with > +	// and we don't count blanks in fenced code for purposes of tight/loose +	// lists or breaking out of lists.  we also don't set last_line_blank +	// on an empty list item. +	container->last_line_blank = (blank && +			container->tag != block_quote && +			container->tag != fenced_code && +			!(container->tag == list_item && +				container->children == NULL && +				container->start_line == line_number)); + +	block *cont = container; +	while (cont->parent) { +		cont->parent->last_line_blank = false; +		cont = cont->parent; +	} + +	if (cur != last_matched_container && +			container == last_matched_container && +			!blank && +			cur->tag == paragraph && +			gh_buf_len(&cur->string_content) > 0) { + +		add_line(cur, ln, offset); + +	} else { // not a lazy continuation + +		// finalize any blocks that were not matched and set cur to container: +		while (cur != last_matched_container) { + +			finalize(cur, line_number); +			cur = cur->parent; +			assert(cur != NULL); +		} + +		if (container->tag == indented_code) { + +			add_line(container, ln, offset); + +		} else if (container->tag == fenced_code) { + +			matched = (indent <= 3 +					&& gh_buf_at(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char) +				&& scan_close_code_fence(ln, first_nonspace, +						container->attributes.fenced_code_data.fence_length); +			if (matched) { +				// if closing fence, don't add line to container; instead, close it: +				finalize(container, line_number); +				container = container->parent; // back up to parent +			} else { +				add_line(container, ln, offset); +			} + +		} else if (container->tag == html_block) { + +			add_line(container, ln, offset); + +		} else if (blank) { + +			// ??? do nothing + +		} else if (container->tag == atx_header) { +			// chop off trailing ###s...use a scanner? +			gh_buf_trim(ln); +			int p = gh_buf_len(ln) - 1; + +			// if string ends in #s, remove these: +			while (gh_buf_at(ln, p) == '#') { +				p--; +			} +			if (gh_buf_at(ln, p) == '\\') { +				// the last # was escaped, so we include it. +				p++; +			} + +			gh_buf_truncate(ln, p + 1); +			add_line(container, ln, first_nonspace); +			finalize(container, line_number); +			container = container->parent; + +		} else if (accepts_lines(container->tag)) { + +			add_line(container, ln, first_nonspace); + +		} else if (container->tag != hrule && container->tag != setext_header) { + +			// create paragraph container for line +			container = add_child(container, paragraph, line_number, first_nonspace + 1); +			add_line(container, ln, first_nonspace); -    } else if (container->tag != hrule && container->tag != setext_header) { - -      // create paragraph container for line -      container = add_child(container, paragraph, line_number, first_nonspace + 1); -      check(add_line(container, ln, first_nonspace) == 0, "could not add line"); - -    } else { +		} else { +			assert(false); +		} -      log_warn("Line %d with container type %d did not match any condition:\n\"%s\"", -               line_number, container->tag, ln->data); - -    } -    *curptr = container; -  } - -  return 0; - error: -  return -1; +		*curptr = container; +	}  } diff --git a/src/bstrlib.c b/src/bstrlib.c deleted file mode 100644 index 1b19dbe..0000000 --- a/src/bstrlib.c +++ /dev/null @@ -1,2979 +0,0 @@ -/*
 - * This source file is part of the bstring string library.  This code was
 - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause 
 - * BSD open source license or GPL v2.0. Refer to the accompanying documentation 
 - * for details on usage and license.
 - */
 -
 -/*
 - * bstrlib.c
 - *
 - * This file is the core module for implementing the bstring functions.
 - */
 -
 -#if defined (_MSC_VER)
 -/* These warnings from MSVC++ are totally pointless. */
 -# define _CRT_SECURE_NO_WARNINGS
 -#endif
 -
 -#include <stdio.h>
 -#include <stddef.h>
 -#include <stdarg.h>
 -#include <stdlib.h>
 -#include <string.h>
 -#include <ctype.h>
 -#include "bstrlib.h"
 -
 -/* Optionally include a mechanism for debugging memory */
 -
 -#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG)
 -#include "memdbg.h"
 -#endif
 -
 -#ifndef bstr__alloc
 -#define bstr__alloc(x) malloc (x)
 -#endif
 -
 -#ifndef bstr__free
 -#define bstr__free(p) free (p)
 -#endif
 -
 -#ifndef bstr__realloc
 -#define bstr__realloc(p,x) realloc ((p), (x))
 -#endif
 -
 -#ifndef bstr__memcpy
 -#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l))
 -#endif
 -
 -#ifndef bstr__memmove
 -#define bstr__memmove(d,s,l) memmove ((d), (s), (l))
 -#endif
 -
 -#ifndef bstr__memset
 -#define bstr__memset(d,c,l) memset ((d), (c), (l))
 -#endif
 -
 -#ifndef bstr__memcmp
 -#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l))
 -#endif
 -
 -#ifndef bstr__memchr
 -#define bstr__memchr(s,c,l) memchr ((s), (c), (l))
 -#endif
 -
 -/* Just a length safe wrapper for memmove. */
 -
 -#define bBlockCopy(D,S,L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); }
 -
 -/* Compute the snapped size for a given requested size.  By snapping to powers
 -   of 2 like this, repeated reallocations are avoided. */
 -static int snapUpSize (int i) {
 -	if (i < 8) {
 -		i = 8;
 -	} else {
 -		unsigned int j;
 -		j = (unsigned int) i;
 -
 -		j |= (j >>  1);
 -		j |= (j >>  2);
 -		j |= (j >>  4);
 -		j |= (j >>  8);		/* Ok, since int >= 16 bits */
 -#if (UINT_MAX != 0xffff)
 -		j |= (j >> 16);		/* For 32 bit int systems */
 -#if (UINT_MAX > 0xffffffffUL)
 -		j |= (j >> 32);		/* For 64 bit int systems */
 -#endif
 -#endif
 -		/* Least power of two greater than i */
 -		j++;
 -		if ((int) j >= i) i = (int) j;
 -	}
 -	return i;
 -}
 -
 -/*  int balloc (bstring b, int len)
 - *
 - *  Increase the size of the memory backing the bstring b to at least len.
 - */
 -int balloc (bstring b, int olen) {
 -	int len;
 -	if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 || 
 -	    b->mlen < b->slen || olen <= 0) {
 -		return BSTR_ERR;
 -	}
 -
 -	if (olen >= b->mlen) {
 -		unsigned char * x;
 -
 -		if ((len = snapUpSize (olen)) <= b->mlen) return BSTR_OK;
 -
 -		/* Assume probability of a non-moving realloc is 0.125 */
 -		if (7 * b->mlen < 8 * b->slen) {
 -
 -			/* If slen is close to mlen in size then use realloc to reduce
 -			   the memory defragmentation */
 -
 -			reallocStrategy:;
 -
 -			x = (unsigned char *) bstr__realloc (b->data, (size_t) len);
 -			if (x == NULL) {
 -
 -				/* Since we failed, try allocating the tighest possible 
 -				   allocation */
 -
 -				if (NULL == (x = (unsigned char *) bstr__realloc (b->data, (size_t) (len = olen)))) {
 -					return BSTR_ERR;
 -				}
 -			}
 -		} else {
 -
 -			/* If slen is not close to mlen then avoid the penalty of copying
 -			   the extra bytes that are allocated, but not considered part of
 -			   the string */
 -
 -			if (NULL == (x = (unsigned char *) bstr__alloc ((size_t) len))) {
 -
 -				/* Perhaps there is no available memory for the two 
 -				   allocations to be in memory at once */
 -
 -				goto reallocStrategy;
 -
 -			} else {
 -				if (b->slen) bstr__memcpy ((char *) x, (char *) b->data, (size_t) b->slen);
 -				bstr__free (b->data);
 -			}
 -		}
 -		b->data = x;
 -		b->mlen = len;
 -		b->data[b->slen] = (unsigned char) '\0';
 -	}
 -
 -	return BSTR_OK;
 -}
 -
 -/*  int ballocmin (bstring b, int len)
 - *
 - *  Set the size of the memory backing the bstring b to len or b->slen+1,
 - *  whichever is larger.  Note that repeated use of this function can degrade
 - *  performance.
 - */
 -int ballocmin (bstring b, int len) {
 -	unsigned char * s;
 -
 -	if (b == NULL || b->data == NULL || (b->slen+1) < 0 || b->mlen <= 0 || 
 -	    b->mlen < b->slen || len <= 0) {
 -		return BSTR_ERR;
 -	}
 -
 -	if (len < b->slen + 1) len = b->slen + 1;
 -
 -	if (len != b->mlen) {
 -		s = (unsigned char *) bstr__realloc (b->data, (size_t) len);
 -		if (NULL == s) return BSTR_ERR;
 -		s[b->slen] = (unsigned char) '\0';
 -		b->data = s;
 -		b->mlen = len;
 -	}
 -
 -	return BSTR_OK;
 -}
 -
 -/*  bstring bfromcstr (const char * str)
 - *
 - *  Create a bstring which contains the contents of the '\0' terminated char *
 - *  buffer str.
 - */
 -bstring bfromcstr (const char * str) {
 -bstring b;
 -int i;
 -size_t j;
 -
 -	if (str == NULL) return NULL;
 -	j = (strlen) (str);
 -	i = snapUpSize ((int) (j + (2 - (j != 0))));
 -	if (i <= (int) j) return NULL;
 -
 -	b = (bstring) bstr__alloc (sizeof (struct tagbstring));
 -	if (NULL == b) return NULL;
 -	b->slen = (int) j;
 -	if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) {
 -		bstr__free (b);
 -		return NULL;
 -	}
 -
 -	bstr__memcpy (b->data, str, j+1);
 -	return b;
 -}
 -
 -/*  bstring bfromcstralloc (int mlen, const char * str)
 - *
 - *  Create a bstring which contains the contents of the '\0' terminated char *
 - *  buffer str.  The memory buffer backing the string is at least len 
 - *  characters in length.
 - */
 -bstring bfromcstralloc (int mlen, const char * str) {
 -bstring b;
 -int i;
 -size_t j;
 -
 -	if (str == NULL) return NULL;
 -	j = (strlen) (str);
 -	i = snapUpSize ((int) (j + (2 - (j != 0))));
 -	if (i <= (int) j) return NULL;
 -
 -	b = (bstring) bstr__alloc (sizeof (struct tagbstring));
 -	if (b == NULL) return NULL;
 -	b->slen = (int) j;
 -	if (i < mlen) i = mlen;
 -
 -	if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) {
 -		bstr__free (b);
 -		return NULL;
 -	}
 -
 -	bstr__memcpy (b->data, str, j+1);
 -	return b;
 -}
 -
 -/*  bstring blk2bstr (const void * blk, int len)
 - *
 - *  Create a bstring which contains the content of the block blk of length 
 - *  len.
 - */
 -bstring blk2bstr (const void * blk, int len) {
 -bstring b;
 -int i;
 -
 -	if (blk == NULL || len < 0) return NULL;
 -	b = (bstring) bstr__alloc (sizeof (struct tagbstring));
 -	if (b == NULL) return NULL;
 -	b->slen = len;
 -
 -	i = len + (2 - (len != 0));
 -	i = snapUpSize (i);
 -
 -	b->mlen = i;
 -
 -	b->data = (unsigned char *) bstr__alloc ((size_t) b->mlen);
 -	if (b->data == NULL) {
 -		bstr__free (b);
 -		return NULL;
 -	}
 -
 -	if (len > 0) bstr__memcpy (b->data, blk, (size_t) len);
 -	b->data[len] = (unsigned char) '\0';
 -
 -	return b;
 -}
 -
 -/*  char * bstr2cstr (const_bstring s, char z)
 - *
 - *  Create a '\0' terminated char * buffer which is equal to the contents of 
 - *  the bstring s, except that any contained '\0' characters are converted 
 - *  to the character in z. This returned value should be freed with a 
 - *  bcstrfree () call, by the calling application.
 - */
 -char * bstr2cstr (const_bstring b, char z) {
 -int i, l;
 -char * r;
 -
 -	if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
 -	l = b->slen;
 -	r = (char *) bstr__alloc ((size_t) (l + 1));
 -	if (r == NULL) return r;
 -
 -	for (i=0; i < l; i ++) {
 -		r[i] = (char) ((b->data[i] == '\0') ? z : (char) (b->data[i]));
 -	}
 -
 -	r[l] = (unsigned char) '\0';
 -
 -	return r;
 -}
 -
 -/*  int bcstrfree (char * s)
 - *
 - *  Frees a C-string generated by bstr2cstr ().  This is normally unnecessary
 - *  since it just wraps a call to bstr__free (), however, if bstr__alloc () 
 - *  and bstr__free () have been redefined as a macros within the bstrlib 
 - *  module (via defining them in memdbg.h after defining 
 - *  BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std 
 - *  library functions, then this allows a correct way of freeing the memory 
 - *  that allows higher level code to be independent from these macro 
 - *  redefinitions.
 - */
 -int bcstrfree (char * s) {
 -	if (s) {
 -		bstr__free (s);
 -		return BSTR_OK;
 -	}
 -	return BSTR_ERR;
 -}
 -
 -/*  int bconcat (bstring b0, const_bstring b1)
 - *
 - *  Concatenate the bstring b1 to the bstring b0.
 - */
 -int bconcat (bstring b0, const_bstring b1) {
 -int len, d;
 -bstring aux = (bstring) b1;
 -
 -	if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR;
 -
 -	d = b0->slen;
 -	len = b1->slen;
 -	if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR;
 -
 -	if (b0->mlen <= d + len + 1) {
 -		ptrdiff_t pd = b1->data - b0->data;
 -		if (0 <= pd && pd < b0->mlen) {
 -			if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR;
 -		}
 -		if (balloc (b0, d + len + 1) != BSTR_OK) {
 -			if (aux != b1) bdestroy (aux);
 -			return BSTR_ERR;
 -		}
 -	}
 -
 -	bBlockCopy (&b0->data[d], &aux->data[0], (size_t) len);
 -	b0->data[d + len] = (unsigned char) '\0';
 -	b0->slen = d + len;
 -	if (aux != b1) bdestroy (aux);
 -	return BSTR_OK;
 -}
 -
 -/*  int bconchar (bstring b, char c)
 -/ *
 - *  Concatenate the single character c to the bstring b.
 - */
 -int bconchar (bstring b, char c) {
 -int d;
 -
 -	if (b == NULL) return BSTR_ERR;
 -	d = b->slen;
 -	if ((d | (b->mlen - d)) < 0 || balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
 -	b->data[d] = (unsigned char) c;
 -	b->data[d + 1] = (unsigned char) '\0';
 -	b->slen++;
 -	return BSTR_OK;
 -}
 -
 -/*  int bcatcstr (bstring b, const char * s)
 - *
 - *  Concatenate a char * string to a bstring.
 - */
 -int bcatcstr (bstring b, const char * s) {
 -char * d;
 -int i, l;
 -
 -	if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen
 -	 || b->mlen <= 0 || s == NULL) return BSTR_ERR;
 -
 -	/* Optimistically concatenate directly */
 -	l = b->mlen - b->slen;
 -	d = (char *) &b->data[b->slen];
 -	for (i=0; i < l; i++) {
 -		if ((*d++ = *s++) == '\0') {
 -			b->slen += i;
 -			return BSTR_OK;
 -		}
 -	}
 -	b->slen += i;
 -
 -	/* Need to explicitely resize and concatenate tail */
 -	return bcatblk (b, (const void *) s, (int) strlen (s));
 -}
 -
 -/*  int bcatblk (bstring b, const void * s, int len)
 - *
 - *  Concatenate a fixed length buffer to a bstring.
 - */
 -int bcatblk (bstring b, const void * s, int len) {
 -int nl;
 -
 -	if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen
 -	 || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR;
 -
 -	if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */
 -	if (b->mlen <= nl && 0 > balloc (b, nl + 1)) return BSTR_ERR;
 -
 -	bBlockCopy (&b->data[b->slen], s, (size_t) len);
 -	b->slen = nl;
 -	b->data[nl] = (unsigned char) '\0';
 -	return BSTR_OK;
 -}
 -
 -/*  bstring bstrcpy (const_bstring b)
 - *
 - *  Create a copy of the bstring b.
 - */
 -bstring bstrcpy (const_bstring b) {
 -bstring b0;
 -int i,j;
 -
 -	/* Attempted to copy an invalid string? */
 -	if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
 -
 -	b0 = (bstring) bstr__alloc (sizeof (struct tagbstring));
 -	if (b0 == NULL) {
 -		/* Unable to allocate memory for string header */
 -		return NULL;
 -	}
 -
 -	i = b->slen;
 -	j = snapUpSize (i + 1);
 -
 -	b0->data = (unsigned char *) bstr__alloc (j);
 -	if (b0->data == NULL) {
 -		j = i + 1;
 -		b0->data = (unsigned char *) bstr__alloc (j);
 -		if (b0->data == NULL) {
 -			/* Unable to allocate memory for string data */
 -			bstr__free (b0);
 -			return NULL;
 -		}
 -	}
 -
 -	b0->mlen = j;
 -	b0->slen = i;
 -
 -	if (i) bstr__memcpy ((char *) b0->data, (char *) b->data, i);
 -	b0->data[b0->slen] = (unsigned char) '\0';
 -
 -	return b0;
 -}
 -
 -/*  int bassign (bstring a, const_bstring b)
 - *
 - *  Overwrite the string a with the contents of string b.
 - */
 -int bassign (bstring a, const_bstring b) {
 -	if (b == NULL || b->data == NULL || b->slen < 0)
 -		return BSTR_ERR;
 -	if (b->slen != 0) {
 -		if (balloc (a, b->slen) != BSTR_OK) return BSTR_ERR;
 -		bstr__memmove (a->data, b->data, b->slen);
 -	} else {
 -		if (a == NULL || a->data == NULL || a->mlen < a->slen || 
 -		    a->slen < 0 || a->mlen == 0) 
 -			return BSTR_ERR;
 -	}
 -	a->data[b->slen] = (unsigned char) '\0';
 -	a->slen = b->slen;
 -	return BSTR_OK;
 -}
 -
 -/*  int bassignmidstr (bstring a, const_bstring b, int left, int len)
 - *
 - *  Overwrite the string a with the middle of contents of string b 
 - *  starting from position left and running for a length len.  left and 
 - *  len are clamped to the ends of b as with the function bmidstr.
 - */
 -int bassignmidstr (bstring a, const_bstring b, int left, int len) {
 -	if (b == NULL || b->data == NULL || b->slen < 0)
 -		return BSTR_ERR;
 -
 -	if (left < 0) {
 -		len += left;
 -		left = 0;
 -	}
 -
 -	if (len > b->slen - left) len = b->slen - left;
 -
 -	if (a == NULL || a->data == NULL || a->mlen < a->slen ||
 -	    a->slen < 0 || a->mlen == 0)
 -		return BSTR_ERR;
 -
 -	if (len > 0) {
 -		if (balloc (a, len) != BSTR_OK) return BSTR_ERR;
 -		bstr__memmove (a->data, b->data + left, len);
 -		a->slen = len;
 -	} else {
 -		a->slen = 0;
 -	}
 -	a->data[a->slen] = (unsigned char) '\0';
 -	return BSTR_OK;
 -}
 -
 -/*  int bassigncstr (bstring a, const char * str)
 - *
 - *  Overwrite the string a with the contents of char * string str.  Note that 
 - *  the bstring a must be a well defined and writable bstring.  If an error 
 - *  occurs BSTR_ERR is returned however a may be partially overwritten.
 - */
 -int bassigncstr (bstring a, const char * str) {
 -int i;
 -size_t len;
 -	if (a == NULL || a->data == NULL || a->mlen < a->slen ||
 -	    a->slen < 0 || a->mlen == 0 || NULL == str) 
 -		return BSTR_ERR;
 -
 -	for (i=0; i < a->mlen; i++) {
 -		if ('\0' == (a->data[i] = str[i])) {
 -			a->slen = i;
 -			return BSTR_OK;
 -		}
 -	}
 -
 -	a->slen = i;
 -	len = strlen (str + i);
 -	if (len > INT_MAX || i + len + 1 > INT_MAX ||
 -	    0 > balloc (a, (int) (i + len + 1))) return BSTR_ERR;
 -	bBlockCopy (a->data + i, str + i, (size_t) len + 1);
 -	a->slen += (int) len;
 -	return BSTR_OK;
 -}
 -
 -/*  int bassignblk (bstring a, const void * s, int len)
 - *
 - *  Overwrite the string a with the contents of the block (s, len).  Note that 
 - *  the bstring a must be a well defined and writable bstring.  If an error 
 - *  occurs BSTR_ERR is returned and a is not overwritten.
 - */
 -int bassignblk (bstring a, const void * s, int len) {
 -	if (a == NULL || a->data == NULL || a->mlen < a->slen ||
 -	    a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1) 
 -		return BSTR_ERR;
 -	if (len + 1 > a->mlen && 0 > balloc (a, len + 1)) return BSTR_ERR;
 -	bBlockCopy (a->data, s, (size_t) len);
 -	a->data[len] = (unsigned char) '\0';
 -	a->slen = len;
 -	return BSTR_OK;
 -}
 -
 -/*  int btrunc (bstring b, int n)
 - *
 - *  Truncate the bstring to at most n characters.
 - */
 -int btrunc (bstring b, int n) {
 -	if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen ||
 -	    b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
 -	if (b->slen > n) {
 -		b->slen = n;
 -		b->data[n] = (unsigned char) '\0';
 -	}
 -	return BSTR_OK;
 -}
 -
 -#define   upcase(c) (toupper ((unsigned char) c))
 -#define downcase(c) (tolower ((unsigned char) c))
 -#define   wspace(c) (isspace ((unsigned char) c))
 -
 -/*  int btoupper (bstring b)
 - *
 - *  Convert contents of bstring to upper case.
 - */
 -int btoupper (bstring b) {
 -int i, len;
 -	if (b == NULL || b->data == NULL || b->mlen < b->slen ||
 -	    b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
 -	for (i=0, len = b->slen; i < len; i++) {
 -		b->data[i] = (unsigned char) upcase (b->data[i]);
 -	}
 -	return BSTR_OK;
 -}
 -
 -/*  int btolower (bstring b)
 - *
 - *  Convert contents of bstring to lower case.
 - */
 -int btolower (bstring b) {
 -int i, len;
 -	if (b == NULL || b->data == NULL || b->mlen < b->slen ||
 -	    b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
 -	for (i=0, len = b->slen; i < len; i++) {
 -		b->data[i] = (unsigned char) downcase (b->data[i]);
 -	}
 -	return BSTR_OK;
 -}
 -
 -/*  int bstricmp (const_bstring b0, const_bstring b1)
 - *
 - *  Compare two strings without differentiating between case.  The return 
 - *  value is the difference of the values of the characters where the two 
 - *  strings first differ after lower case transformation, otherwise 0 is 
 - *  returned indicating that the strings are equal.  If the lengths are 
 - *  different, then a difference from 0 is given, but if the first extra 
 - *  character is '\0', then it is taken to be the value UCHAR_MAX+1.
 - */
 -int bstricmp (const_bstring b0, const_bstring b1) {
 -int i, v, n;
 -
 -	if (bdata (b0) == NULL || b0->slen < 0 || 
 -	    bdata (b1) == NULL || b1->slen < 0) return SHRT_MIN;
 -	if ((n = b0->slen) > b1->slen) n = b1->slen;
 -	else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK;
 -
 -	for (i = 0; i < n; i ++) {
 -		v  = (char) downcase (b0->data[i])
 -		   - (char) downcase (b1->data[i]);
 -		if (0 != v) return v;
 -	}
 -
 -	if (b0->slen > n) {
 -		v = (char) downcase (b0->data[n]);
 -		if (v) return v;
 -		return UCHAR_MAX + 1;
 -	}
 -	if (b1->slen > n) {
 -		v = - (char) downcase (b1->data[n]);
 -		if (v) return v;
 -		return - (int) (UCHAR_MAX + 1);
 -	}
 -	return BSTR_OK;
 -}
 -
 -/*  int bstrnicmp (const_bstring b0, const_bstring b1, int n)
 - *
 - *  Compare two strings without differentiating between case for at most n
 - *  characters.  If the position where the two strings first differ is
 - *  before the nth position, the return value is the difference of the values
 - *  of the characters, otherwise 0 is returned.  If the lengths are different
 - *  and less than n characters, then a difference from 0 is given, but if the 
 - *  first extra character is '\0', then it is taken to be the value 
 - *  UCHAR_MAX+1.
 - */
 -int bstrnicmp (const_bstring b0, const_bstring b1, int n) {
 -int i, v, m;
 -
 -	if (bdata (b0) == NULL || b0->slen < 0 || 
 -	    bdata (b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN;
 -	m = n;
 -	if (m > b0->slen) m = b0->slen;
 -	if (m > b1->slen) m = b1->slen;
 -
 -	if (b0->data != b1->data) {
 -		for (i = 0; i < m; i ++) {
 -			v  = (char) downcase (b0->data[i]);
 -			v -= (char) downcase (b1->data[i]);
 -			if (v != 0) return b0->data[i] - b1->data[i];
 -		}
 -	}
 -
 -	if (n == m || b0->slen == b1->slen) return BSTR_OK;
 -
 -	if (b0->slen > m) {
 -		v = (char) downcase (b0->data[m]);
 -		if (v) return v;
 -		return UCHAR_MAX + 1;
 -	}
 -
 -	v = - (char) downcase (b1->data[m]);
 -	if (v) return v;
 -	return - (int) (UCHAR_MAX + 1);
 -}
 -
 -/*  int biseqcaseless (const_bstring b0, const_bstring b1)
 - *
 - *  Compare two strings for equality without differentiating between case.  
 - *  If the strings differ other than in case, 0 is returned, if the strings 
 - *  are the same, 1 is returned, if there is an error, -1 is returned.  If 
 - *  the length of the strings are different, this function is O(1).  '\0' 
 - *  termination characters are not treated in any special way.
 - */
 -int biseqcaseless (const_bstring b0, const_bstring b1) {
 -int i, n;
 -
 -	if (bdata (b0) == NULL || b0->slen < 0 || 
 -	    bdata (b1) == NULL || b1->slen < 0) return BSTR_ERR;
 -	if (b0->slen != b1->slen) return BSTR_OK;
 -	if (b0->data == b1->data || b0->slen == 0) return 1;
 -	for (i=0, n=b0->slen; i < n; i++) {
 -		if (b0->data[i] != b1->data[i]) {
 -			unsigned char c = (unsigned char) downcase (b0->data[i]);
 -			if (c != (unsigned char) downcase (b1->data[i])) return 0;
 -		}
 -	}
 -	return 1;
 -}
 -
 -/*  int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len)
 - *
 - *  Compare beginning of string b0 with a block of memory of length len 
 - *  without differentiating between case for equality.  If the beginning of b0
 - *  differs from the memory block other than in case (or if b0 is too short), 
 - *  0 is returned, if the strings are the same, 1 is returned, if there is an 
 - *  error, -1 is returned.  '\0' characters are not treated in any special 
 - *  way.
 - */
 -int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) {
 -int i;
 -
 -	if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0)
 -		return BSTR_ERR;
 -	if (b0->slen < len) return BSTR_OK;
 -	if (b0->data == (const unsigned char *) blk || len == 0) return 1;
 -
 -	for (i = 0; i < len; i ++) {
 -		if (b0->data[i] != ((const unsigned char *) blk)[i]) {
 -			if (downcase (b0->data[i]) != 
 -			    downcase (((const unsigned char *) blk)[i])) return 0;
 -		}
 -	}
 -	return 1;
 -}
 -
 -/*
 - * int bltrimws (bstring b)
 - *
 - * Delete whitespace contiguous from the left end of the string.
 - */
 -int bltrimws (bstring b) {
 -int i, len;
 -
 -	if (b == NULL || b->data == NULL || b->mlen < b->slen ||
 -	    b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
 -
 -	for (len = b->slen, i = 0; i < len; i++) {
 -		if (!wspace (b->data[i])) {
 -			return bdelete (b, 0, i);
 -		}
 -	}
 -
 -	b->data[0] = (unsigned char) '\0';
 -	b->slen = 0;
 -	return BSTR_OK;
 -}
 -
 -/*
 - * int brtrimws (bstring b)
 - *
 - * Delete whitespace contiguous from the right end of the string.
 - */
 -int brtrimws (bstring b) {
 -int i;
 -
 -	if (b == NULL || b->data == NULL || b->mlen < b->slen ||
 -	    b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
 -
 -	for (i = b->slen - 1; i >= 0; i--) {
 -		if (!wspace (b->data[i])) {
 -			if (b->mlen > i) b->data[i+1] = (unsigned char) '\0';
 -			b->slen = i + 1;
 -			return BSTR_OK;
 -		}
 -	}
 -
 -	b->data[0] = (unsigned char) '\0';
 -	b->slen = 0;
 -	return BSTR_OK;
 -}
 -
 -/*
 - * int btrimws (bstring b)
 - *
 - * Delete whitespace contiguous from both ends of the string.
 - */
 -int btrimws (bstring b) {
 -int i, j;
 -
 -	if (b == NULL || b->data == NULL || b->mlen < b->slen ||
 -	    b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
 -
 -	for (i = b->slen - 1; i >= 0; i--) {
 -		if (!wspace (b->data[i])) {
 -			if (b->mlen > i) b->data[i+1] = (unsigned char) '\0';
 -			b->slen = i + 1;
 -			for (j = 0; wspace (b->data[j]); j++) {}
 -			return bdelete (b, 0, j);
 -		}
 -	}
 -
 -	b->data[0] = (unsigned char) '\0';
 -	b->slen = 0;
 -	return BSTR_OK;
 -}
 -
 -/*  int biseq (const_bstring b0, const_bstring b1)
 - *
 - *  Compare the string b0 and b1.  If the strings differ, 0 is returned, if 
 - *  the strings are the same, 1 is returned, if there is an error, -1 is 
 - *  returned.  If the length of the strings are different, this function is
 - *  O(1).  '\0' termination characters are not treated in any special way.
 - */
 -int biseq (const_bstring b0, const_bstring b1) {
 -	if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
 -		b0->slen < 0 || b1->slen < 0) return BSTR_ERR;
 -	if (b0->slen != b1->slen) return BSTR_OK;
 -	if (b0->data == b1->data || b0->slen == 0) return 1;
 -	return !bstr__memcmp (b0->data, b1->data, b0->slen);
 -}
 -
 -/*  int bisstemeqblk (const_bstring b0, const void * blk, int len)
 - *
 - *  Compare beginning of string b0 with a block of memory of length len for 
 - *  equality.  If the beginning of b0 differs from the memory block (or if b0 
 - *  is too short), 0 is returned, if the strings are the same, 1 is returned, 
 - *  if there is an error, -1 is returned.  '\0' characters are not treated in 
 - *  any special way.
 - */
 -int bisstemeqblk (const_bstring b0, const void * blk, int len) {
 -int i;
 -
 -	if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0)
 -		return BSTR_ERR;
 -	if (b0->slen < len) return BSTR_OK;
 -	if (b0->data == (const unsigned char *) blk || len == 0) return 1;
 -
 -	for (i = 0; i < len; i ++) {
 -		if (b0->data[i] != ((const unsigned char *) blk)[i]) return BSTR_OK;
 -	}
 -	return 1;
 -}
 -
 -/*  int biseqcstr (const_bstring b, const char *s)
 - *
 - *  Compare the bstring b and char * string s.  The C string s must be '\0' 
 - *  terminated at exactly the length of the bstring b, and the contents 
 - *  between the two must be identical with the bstring b with no '\0' 
 - *  characters for the two contents to be considered equal.  This is 
 - *  equivalent to the condition that their current contents will be always be 
 - *  equal when comparing them in the same format after converting one or the 
 - *  other.  If the strings are equal 1 is returned, if they are unequal 0 is 
 - *  returned and if there is a detectable error BSTR_ERR is returned.
 - */
 -int biseqcstr (const_bstring b, const char * s) {
 -int i;
 -	if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR;
 -	for (i=0; i < b->slen; i++) {
 -		if (s[i] == '\0' || b->data[i] != (unsigned char) s[i]) return BSTR_OK;
 -	}
 -	return s[i] == '\0';
 -}
 -
 -/*  int biseqcstrcaseless (const_bstring b, const char *s)
 - *
 - *  Compare the bstring b and char * string s.  The C string s must be '\0' 
 - *  terminated at exactly the length of the bstring b, and the contents 
 - *  between the two must be identical except for case with the bstring b with 
 - *  no '\0' characters for the two contents to be considered equal.  This is 
 - *  equivalent to the condition that their current contents will be always be 
 - *  equal ignoring case when comparing them in the same format after 
 - *  converting one or the other.  If the strings are equal, except for case, 
 - *  1 is returned, if they are unequal regardless of case 0 is returned and 
 - *  if there is a detectable error BSTR_ERR is returned.
 - */
 -int biseqcstrcaseless (const_bstring b, const char * s) {
 -int i;
 -	if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR;
 -	for (i=0; i < b->slen; i++) {
 -		if (s[i] == '\0' || 
 -		    (b->data[i] != (unsigned char) s[i] && 
 -		     downcase (b->data[i]) != (unsigned char) downcase (s[i])))
 -			return BSTR_OK;
 -	}
 -	return s[i] == '\0';
 -}
 -
 -/*  int bstrcmp (const_bstring b0, const_bstring b1)
 - *
 - *  Compare the string b0 and b1.  If there is an error, SHRT_MIN is returned, 
 - *  otherwise a value less than or greater than zero, indicating that the 
 - *  string pointed to by b0 is lexicographically less than or greater than 
 - *  the string pointed to by b1 is returned.  If the the string lengths are 
 - *  unequal but the characters up until the length of the shorter are equal 
 - *  then a value less than, or greater than zero, indicating that the string 
 - *  pointed to by b0 is shorter or longer than the string pointed to by b1 is 
 - *  returned.  0 is returned if and only if the two strings are the same.  If 
 - *  the length of the strings are different, this function is O(n).  Like its
 - *  standard C library counter part strcmp, the comparison does not proceed 
 - *  past any '\0' termination characters encountered.
 - */
 -int bstrcmp (const_bstring b0, const_bstring b1) {
 -int i, v, n;
 -
 -	if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
 -		b0->slen < 0 || b1->slen < 0) return SHRT_MIN;
 -	n = b0->slen; if (n > b1->slen) n = b1->slen;
 -	if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0))
 -		return BSTR_OK;
 -
 -	for (i = 0; i < n; i ++) {
 -		v = ((char) b0->data[i]) - ((char) b1->data[i]);
 -		if (v != 0) return v;
 -		if (b0->data[i] == (unsigned char) '\0') return BSTR_OK;
 -	}
 -
 -	if (b0->slen > n) return 1;
 -	if (b1->slen > n) return -1;
 -	return BSTR_OK;
 -}
 -
 -/*  int bstrncmp (const_bstring b0, const_bstring b1, int n)
 - *
 - *  Compare the string b0 and b1 for at most n characters.  If there is an 
 - *  error, SHRT_MIN is returned, otherwise a value is returned as if b0 and 
 - *  b1 were first truncated to at most n characters then bstrcmp was called
 - *  with these new strings are paremeters.  If the length of the strings are 
 - *  different, this function is O(n).  Like its standard C library counter 
 - *  part strcmp, the comparison does not proceed past any '\0' termination 
 - *  characters encountered.
 - */
 -int bstrncmp (const_bstring b0, const_bstring b1, int n) {
 -int i, v, m;
 -
 -	if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
 -		b0->slen < 0 || b1->slen < 0) return SHRT_MIN;
 -	m = n;
 -	if (m > b0->slen) m = b0->slen;
 -	if (m > b1->slen) m = b1->slen;
 -
 -	if (b0->data != b1->data) {
 -		for (i = 0; i < m; i ++) {
 -			v = ((char) b0->data[i]) - ((char) b1->data[i]);
 -			if (v != 0) return v;
 -			if (b0->data[i] == (unsigned char) '\0') return BSTR_OK;
 -		}
 -	}
 -
 -	if (n == m || b0->slen == b1->slen) return BSTR_OK;
 -
 -	if (b0->slen > m) return 1;
 -	return -1;
 -}
 -
 -/*  bstring bmidstr (const_bstring b, int left, int len)
 - *
 - *  Create a bstring which is the substring of b starting from position left
 - *  and running for a length len (clamped by the end of the bstring b.)  If
 - *  b is detectably invalid, then NULL is returned.  The section described 
 - *  by (left, len) is clamped to the boundaries of b.
 - */
 -bstring bmidstr (const_bstring b, int left, int len) {
 -
 -	if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
 -
 -	if (left < 0) {
 -		len += left;
 -		left = 0;
 -	}
 -
 -	if (len > b->slen - left) len = b->slen - left;
 -
 -	if (len <= 0) return bfromcstr ("");
 -	return blk2bstr (b->data + left, len);
 -}
 -
 -/*  int bdelete (bstring b, int pos, int len)
 - *
 - *  Removes characters from pos to pos+len-1 inclusive and shifts the tail of 
 - *  the bstring starting from pos+len to pos.  len must be positive for this 
 - *  call to have any effect.  The section of the string described by (pos, 
 - *  len) is clamped to boundaries of the bstring b.
 - */
 -int bdelete (bstring b, int pos, int len) {
 -	/* Clamp to left side of bstring */
 -	if (pos < 0) {
 -		len += pos;
 -		pos = 0;
 -	}
 -
 -	if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 || 
 -	    b->mlen < b->slen || b->mlen <= 0) 
 -		return BSTR_ERR;
 -	if (len > 0 && pos < b->slen) {
 -		if (pos + len >= b->slen) {
 -			b->slen = pos;
 -		} else {
 -			bBlockCopy ((char *) (b->data + pos),
 -			            (char *) (b->data + pos + len), 
 -			            b->slen - (pos+len));
 -			b->slen -= len;
 -		}
 -		b->data[b->slen] = (unsigned char) '\0';
 -	}
 -	return BSTR_OK;
 -}
 -
 -/*  int bdestroy (bstring b)
 - *
 - *  Free up the bstring.  Note that if b is detectably invalid or not writable
 - *  then no action is performed and BSTR_ERR is returned.  Like a freed memory
 - *  allocation, dereferences, writes or any other action on b after it has 
 - *  been bdestroyed is undefined.
 - */
 -int bdestroy (bstring b) {
 -	if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen ||
 -	    b->data == NULL)
 -		return BSTR_ERR;
 -
 -	bstr__free (b->data);
 -
 -	/* In case there is any stale usage, there is one more chance to 
 -	   notice this error. */
 -
 -	b->slen = -1;
 -	b->mlen = -__LINE__;
 -	b->data = NULL;
 -
 -	bstr__free (b);
 -	return BSTR_OK;
 -}
 -
 -/*  int binstr (const_bstring b1, int pos, const_bstring b2)
 - *
 - *  Search for the bstring b2 in b1 starting from position pos, and searching 
 - *  forward.  If it is found then return with the first position where it is 
 - *  found, otherwise return BSTR_ERR.  Note that this is just a brute force 
 - *  string searcher that does not attempt clever things like the Boyer-Moore 
 - *  search algorithm.  Because of this there are many degenerate cases where 
 - *  this can take much longer than it needs to.
 - */
 -int binstr (const_bstring b1, int pos, const_bstring b2) {
 -int j, ii, ll, lf;
 -unsigned char * d0;
 -unsigned char c0;
 -register unsigned char * d1;
 -register unsigned char c1;
 -register int i;
 -
 -	if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
 -	    b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
 -	if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR;
 -	if (b1->slen < pos || pos < 0) return BSTR_ERR;
 -	if (b2->slen == 0) return pos;
 -
 -	/* No space to find such a string? */
 -	if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR;
 -
 -	/* An obvious alias case */
 -	if (b1->data == b2->data && pos == 0) return 0;
 -
 -	i = pos;
 -
 -	d0 = b2->data;
 -	d1 = b1->data;
 -	ll = b2->slen;
 -
 -	/* Peel off the b2->slen == 1 case */
 -	c0 = d0[0];
 -	if (1 == ll) {
 -		for (;i < lf; i++) if (c0 == d1[i]) return i;
 -		return BSTR_ERR;
 -	}
 -
 -	c1 = c0;
 -	j = 0;
 -	lf = b1->slen - 1;
 -
 -	ii = -1;
 -	if (i < lf) do {
 -		/* Unrolled current character test */
 -		if (c1 != d1[i]) {
 -			if (c1 != d1[1+i]) {
 -				i += 2;
 -				continue;
 -			}
 -			i++;
 -		}
 -
 -		/* Take note if this is the start of a potential match */
 -		if (0 == j) ii = i;
 -
 -		/* Shift the test character down by one */
 -		j++;
 -		i++;
 -
 -		/* If this isn't past the last character continue */
 -		if (j < ll) {
 -			c1 = d0[j];
 -			continue;
 -		}
 -
 -		N0:;
 -
 -		/* If no characters mismatched, then we matched */
 -		if (i == ii+j) return ii;
 -
 -		/* Shift back to the beginning */
 -		i -= j;
 -		j  = 0;
 -		c1 = c0;
 -	} while (i < lf);
 -
 -	/* Deal with last case if unrolling caused a misalignment */
 -	if (i == lf && ll == j+1 && c1 == d1[i]) goto N0;
 -
 -	return BSTR_ERR;
 -}
 -
 -/*  int binstrr (const_bstring b1, int pos, const_bstring b2)
 - *
 - *  Search for the bstring b2 in b1 starting from position pos, and searching 
 - *  backward.  If it is found then return with the first position where it is 
 - *  found, otherwise return BSTR_ERR.  Note that this is just a brute force 
 - *  string searcher that does not attempt clever things like the Boyer-Moore 
 - *  search algorithm.  Because of this there are many degenerate cases where 
 - *  this can take much longer than it needs to.
 - */
 -int binstrr (const_bstring b1, int pos, const_bstring b2) {
 -int j, i, l;
 -unsigned char * d0, * d1;
 -
 -	if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
 -	    b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
 -	if (b1->slen == pos && b2->slen == 0) return pos;
 -	if (b1->slen < pos || pos < 0) return BSTR_ERR;
 -	if (b2->slen == 0) return pos;
 -
 -	/* Obvious alias case */
 -	if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0;
 -
 -	i = pos;
 -	if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR;
 -
 -	/* If no space to find such a string then snap back */
 -	if (l + 1 <= i) i = l;
 -	j = 0;
 -
 -	d0 = b2->data;
 -	d1 = b1->data;
 -	l  = b2->slen;
 -
 -	for (;;) {
 -		if (d0[j] == d1[i + j]) {
 -			j ++;
 -			if (j >= l) return i;
 -		} else {
 -			i --;
 -			if (i < 0) break;
 -			j=0;
 -		}
 -	}
 -
 -	return BSTR_ERR;
 -}
 -
 -/*  int binstrcaseless (const_bstring b1, int pos, const_bstring b2)
 - *
 - *  Search for the bstring b2 in b1 starting from position pos, and searching 
 - *  forward but without regard to case.  If it is found then return with the 
 - *  first position where it is found, otherwise return BSTR_ERR.  Note that 
 - *  this is just a brute force string searcher that does not attempt clever 
 - *  things like the Boyer-Moore search algorithm.  Because of this there are 
 - *  many degenerate cases where this can take much longer than it needs to.
 - */
 -int binstrcaseless (const_bstring b1, int pos, const_bstring b2) {
 -int j, i, l, ll;
 -unsigned char * d0, * d1;
 -
 -	if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
 -	    b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
 -	if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR;
 -	if (b1->slen < pos || pos < 0) return BSTR_ERR;
 -	if (b2->slen == 0) return pos;
 -
 -	l = b1->slen - b2->slen + 1;
 -
 -	/* No space to find such a string? */
 -	if (l <= pos) return BSTR_ERR;
 -
 -	/* An obvious alias case */
 -	if (b1->data == b2->data && pos == 0) return BSTR_OK;
 -
 -	i = pos;
 -	j = 0;
 -
 -	d0 = b2->data;
 -	d1 = b1->data;
 -	ll = b2->slen;
 -
 -	for (;;) {
 -		if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) {
 -			j ++;
 -			if (j >= ll) return i;
 -		} else {
 -			i ++;
 -			if (i >= l) break;
 -			j=0;
 -		}
 -	}
 -
 -	return BSTR_ERR;
 -}
 -
 -/*  int binstrrcaseless (const_bstring b1, int pos, const_bstring b2)
 - *
 - *  Search for the bstring b2 in b1 starting from position pos, and searching 
 - *  backward but without regard to case.  If it is found then return with the 
 - *  first position where it is found, otherwise return BSTR_ERR.  Note that 
 - *  this is just a brute force string searcher that does not attempt clever 
 - *  things like the Boyer-Moore search algorithm.  Because of this there are 
 - *  many degenerate cases where this can take much longer than it needs to.
 - */
 -int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) {
 -int j, i, l;
 -unsigned char * d0, * d1;
 -
 -	if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
 -	    b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
 -	if (b1->slen == pos && b2->slen == 0) return pos;
 -	if (b1->slen < pos || pos < 0) return BSTR_ERR;
 -	if (b2->slen == 0) return pos;
 -
 -	/* Obvious alias case */
 -	if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK;
 -
 -	i = pos;
 -	if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR;
 -
 -	/* If no space to find such a string then snap back */
 -	if (l + 1 <= i) i = l;
 -	j = 0;
 -
 -	d0 = b2->data;
 -	d1 = b1->data;
 -	l  = b2->slen;
 -
 -	for (;;) {
 -		if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) {
 -			j ++;
 -			if (j >= l) return i;
 -		} else {
 -			i --;
 -			if (i < 0) break;
 -			j=0;
 -		}
 -	}
 -
 -	return BSTR_ERR;
 -}
 -
 -
 -/*  int bstrchrp (const_bstring b, int c, int pos)
 - *
 - *  Search for the character c in b forwards from the position pos 
 - *  (inclusive).
 - */
 -int bstrchrp (const_bstring b, int c, int pos) {
 -unsigned char * p;
 -
 -	if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR;
 -	p = (unsigned char *) bstr__memchr ((b->data + pos), (unsigned char) c, (b->slen - pos));
 -	if (p) return (int) (p - b->data);
 -	return BSTR_ERR;
 -}
 -
 -/*  int bstrrchrp (const_bstring b, int c, int pos)
 - *
 - *  Search for the character c in b backwards from the position pos in string 
 - *  (inclusive).
 - */
 -int bstrrchrp (const_bstring b, int c, int pos) {
 -int i;
 - 
 -	if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR;
 -	for (i=pos; i >= 0; i--) {
 -		if (b->data[i] == (unsigned char) c) return i;
 -	}
 -	return BSTR_ERR;
 -}
 -
 -#if !defined (BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF)
 -#define LONG_LOG_BITS_QTY (3)
 -#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY)
 -#define LONG_TYPE unsigned char
 -
 -#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY)
 -struct charField { LONG_TYPE content[CFCLEN]; };
 -#define testInCharField(cf,c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1))))
 -#define setInCharField(cf,idx) { \
 -	unsigned int c = (unsigned int) (idx); \
 -	(cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \
 -}
 -
 -#else
 -
 -#define CFCLEN (1 << CHAR_BIT)
 -struct charField { unsigned char content[CFCLEN]; };
 -#define testInCharField(cf,c) ((cf)->content[(unsigned char) (c)])
 -#define setInCharField(cf,idx) (cf)->content[(unsigned int) (idx)] = ~0
 -
 -#endif
 -
 -/* Convert a bstring to charField */
 -static int buildCharField (struct charField * cf, const_bstring b) {
 -int i;
 -	if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR;
 -	memset ((void *) cf->content, 0, sizeof (struct charField));
 -	for (i=0; i < b->slen; i++) {
 -		setInCharField (cf, b->data[i]);
 -	}
 -	return BSTR_OK;
 -}
 -
 -static void invertCharField (struct charField * cf) {
 -int i;
 -	for (i=0; i < CFCLEN; i++) cf->content[i] = ~cf->content[i];
 -}
 -
 -/* Inner engine for binchr */
 -static int binchrCF (const unsigned char * data, int len, int pos, const struct charField * cf) {
 -int i;
 -	for (i=pos; i < len; i++) {
 -		unsigned char c = (unsigned char) data[i];
 -		if (testInCharField (cf, c)) return i;
 -	}
 -	return BSTR_ERR;
 -}
 -
 -/*  int binchr (const_bstring b0, int pos, const_bstring b1);
 - *
 - *  Search for the first position in b0 starting from pos or after, in which 
 - *  one of the characters in b1 is found and return it.  If such a position 
 - *  does not exist in b0, then BSTR_ERR is returned.
 - */
 -int binchr (const_bstring b0, int pos, const_bstring b1) {
 -struct charField chrs;
 -	if (pos < 0 || b0 == NULL || b0->data == NULL ||
 -	    b0->slen <= pos) return BSTR_ERR;
 -	if (1 == b1->slen) return bstrchrp (b0, b1->data[0], pos);
 -	if (0 > buildCharField (&chrs, b1)) return BSTR_ERR;
 -	return binchrCF (b0->data, b0->slen, pos, &chrs);
 -}
 -
 -/* Inner engine for binchrr */
 -static int binchrrCF (const unsigned char * data, int pos, const struct charField * cf) {
 -int i;
 -	for (i=pos; i >= 0; i--) {
 -		unsigned int c = (unsigned int) data[i];
 -		if (testInCharField (cf, c)) return i;
 -	}
 -	return BSTR_ERR;
 -}
 -
 -/*  int binchrr (const_bstring b0, int pos, const_bstring b1);
 - *
 - *  Search for the last position in b0 no greater than pos, in which one of 
 - *  the characters in b1 is found and return it.  If such a position does not 
 - *  exist in b0, then BSTR_ERR is returned.
 - */
 -int binchrr (const_bstring b0, int pos, const_bstring b1) {
 -struct charField chrs;
 -	if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL ||
 -	    b0->slen < pos) return BSTR_ERR;
 -	if (pos == b0->slen) pos--;
 -	if (1 == b1->slen) return bstrrchrp (b0, b1->data[0], pos);
 -	if (0 > buildCharField (&chrs, b1)) return BSTR_ERR;
 -	return binchrrCF (b0->data, pos, &chrs);
 -}
 -
 -/*  int bninchr (const_bstring b0, int pos, const_bstring b1);
 - *
 - *  Search for the first position in b0 starting from pos or after, in which 
 - *  none of the characters in b1 is found and return it.  If such a position 
 - *  does not exist in b0, then BSTR_ERR is returned.
 - */
 -int bninchr (const_bstring b0, int pos, const_bstring b1) {
 -struct charField chrs;
 -	if (pos < 0 || b0 == NULL || b0->data == NULL || 
 -	    b0->slen <= pos) return BSTR_ERR;
 -	if (buildCharField (&chrs, b1) < 0) return BSTR_ERR;
 -	invertCharField (&chrs);
 -	return binchrCF (b0->data, b0->slen, pos, &chrs);
 -}
 -
 -/*  int bninchrr (const_bstring b0, int pos, const_bstring b1);
 - *
 - *  Search for the last position in b0 no greater than pos, in which none of 
 - *  the characters in b1 is found and return it.  If such a position does not 
 - *  exist in b0, then BSTR_ERR is returned.
 - */
 -int bninchrr (const_bstring b0, int pos, const_bstring b1) {
 -struct charField chrs;
 -	if (pos < 0 || b0 == NULL || b0->data == NULL || 
 -	    b0->slen < pos) return BSTR_ERR;
 -	if (pos == b0->slen) pos--;
 -	if (buildCharField (&chrs, b1) < 0) return BSTR_ERR;
 -	invertCharField (&chrs);
 -	return binchrrCF (b0->data, pos, &chrs);
 -}
 -
 -/*  int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill)
 - *
 - *  Overwrite the string b0 starting at position pos with the string b1. If 
 - *  the position pos is past the end of b0, then the character "fill" is 
 - *  appended as necessary to make up the gap between the end of b0 and pos.
 - *  If b1 is NULL, it behaves as if it were a 0-length string.
 - */
 -int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill) {
 -int d, newlen;
 -ptrdiff_t pd;
 -bstring aux = (bstring) b1;
 -
 -	if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data || 
 -	    b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR;
 -	if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR;
 -
 -	d = pos;
 -
 -	/* Aliasing case */
 -	if (NULL != aux) {
 -		if ((pd = (ptrdiff_t) (b1->data - b0->data)) >= 0 && pd < (ptrdiff_t) b0->mlen) {
 -			if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR;
 -		}
 -		d += aux->slen;
 -	}
 -
 -	/* Increase memory size if necessary */
 -	if (balloc (b0, d + 1) != BSTR_OK) {
 -		if (aux != b1) bdestroy (aux);
 -		return BSTR_ERR;
 -	}
 -
 -	newlen = b0->slen;
 -
 -	/* Fill in "fill" character as necessary */
 -	if (pos > newlen) {
 -		bstr__memset (b0->data + b0->slen, (int) fill, (size_t) (pos - b0->slen));
 -		newlen = pos;
 -	}
 -
 -	/* Copy b1 to position pos in b0. */
 -	if (aux != NULL) {
 -		bBlockCopy ((char *) (b0->data + pos), (char *) aux->data, aux->slen);
 -		if (aux != b1) bdestroy (aux);
 -	}
 -
 -	/* Indicate the potentially increased size of b0 */
 -	if (d > newlen) newlen = d;
 -
 -	b0->slen = newlen;
 -	b0->data[newlen] = (unsigned char) '\0';
 -
 -	return BSTR_OK;
 -}
 -
 -/*  int binsert (bstring b1, int pos, bstring b2, unsigned char fill)
 - *
 - *  Inserts the string b2 into b1 at position pos.  If the position pos is 
 - *  past the end of b1, then the character "fill" is appended as necessary to 
 - *  make up the gap between the end of b1 and pos.  Unlike bsetstr, binsert
 - *  does not allow b2 to be NULL.
 - */
 -int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) {
 -int d, l;
 -ptrdiff_t pd;
 -bstring aux = (bstring) b2;
 -
 -	if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 || 
 -	    b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR;
 -
 -	/* Aliasing case */
 -	if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->mlen) {
 -		if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR;
 -	}
 -
 -	/* Compute the two possible end pointers */
 -	d = b1->slen + aux->slen;
 -	l = pos + aux->slen;
 -	if ((d|l) < 0) return BSTR_ERR;
 -
 -	if (l > d) {
 -		/* Inserting past the end of the string */
 -		if (balloc (b1, l + 1) != BSTR_OK) {
 -			if (aux != b2) bdestroy (aux);
 -			return BSTR_ERR;
 -		}
 -		bstr__memset (b1->data + b1->slen, (int) fill, (size_t) (pos - b1->slen));
 -		b1->slen = l;
 -	} else {
 -		/* Inserting in the middle of the string */
 -		if (balloc (b1, d + 1) != BSTR_OK) {
 -			if (aux != b2) bdestroy (aux);
 -			return BSTR_ERR;
 -		}
 -		bBlockCopy (b1->data + l, b1->data + pos, d - l);
 -		b1->slen = d;
 -	}
 -	bBlockCopy (b1->data + pos, aux->data, aux->slen);
 -	b1->data[b1->slen] = (unsigned char) '\0';
 -	if (aux != b2) bdestroy (aux);
 -	return BSTR_OK;
 -}
 -
 -/*  int breplace (bstring b1, int pos, int len, bstring b2, 
 - *                unsigned char fill)
 - *
 - *  Replace a section of a string from pos for a length len with the string b2.
 - *  fill is used is pos > b1->slen.
 - */
 -int breplace (bstring b1, int pos, int len, const_bstring b2, 
 -			  unsigned char fill) {
 -int pl, ret;
 -ptrdiff_t pd;
 -bstring aux = (bstring) b2;
 -
 -	if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL || 
 -	    b2 == NULL || b1->data == NULL || b2->data == NULL || 
 -	    b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen ||
 -	    b1->mlen <= 0) return BSTR_ERR;
 -
 -	/* Straddles the end? */
 -	if (pl >= b1->slen) {
 -		if ((ret = bsetstr (b1, pos, b2, fill)) < 0) return ret;
 -		if (pos + b2->slen < b1->slen) {
 -			b1->slen = pos + b2->slen;
 -			b1->data[b1->slen] = (unsigned char) '\0';
 -		}
 -		return ret;
 -	}
 -
 -	/* Aliasing case */
 -	if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->slen) {
 -		if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR;
 -	}
 -
 -	if (aux->slen > len) {
 -		if (balloc (b1, b1->slen + aux->slen - len) != BSTR_OK) {
 -			if (aux != b2) bdestroy (aux);
 -			return BSTR_ERR;
 -		}
 -	}
 -
 -	if (aux->slen != len) bstr__memmove (b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len));
 -	bstr__memcpy (b1->data + pos, aux->data, aux->slen);
 -	b1->slen += aux->slen - len;
 -	b1->data[b1->slen] = (unsigned char) '\0';
 -	if (aux != b2) bdestroy (aux);
 -	return BSTR_OK;
 -}
 -
 -/*  
 - *  findreplaceengine is used to implement bfindreplace and 
 - *  bfindreplacecaseless. It works by breaking the three cases of
 - *  expansion, reduction and replacement, and solving each of these
 - *  in the most efficient way possible.
 - */
 -
 -typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2);
 -
 -#define INITIAL_STATIC_FIND_INDEX_COUNT 32
 -
 -static int findreplaceengine (bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) {
 -int i, ret, slen, mlen, delta, acc;
 -int * d;
 -int static_d[INITIAL_STATIC_FIND_INDEX_COUNT+1]; /* This +1 is unnecessary, but it shuts up LINT. */
 -ptrdiff_t pd;
 -bstring auxf = (bstring) find;
 -bstring auxr = (bstring) repl;
 -
 -	if (b == NULL || b->data == NULL || find == NULL ||
 -	    find->data == NULL || repl == NULL || repl->data == NULL || 
 -	    pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen || 
 -	    b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR;
 -	if (pos > b->slen - find->slen) return BSTR_OK;
 -
 -	/* Alias with find string */
 -	pd = (ptrdiff_t) (find->data - b->data);
 -	if ((ptrdiff_t) (pos - find->slen) < pd && pd < (ptrdiff_t) b->slen) {
 -		if (NULL == (auxf = bstrcpy (find))) return BSTR_ERR;
 -	}
 -
 -	/* Alias with repl string */
 -	pd = (ptrdiff_t) (repl->data - b->data);
 -	if ((ptrdiff_t) (pos - repl->slen) < pd && pd < (ptrdiff_t) b->slen) {
 -		if (NULL == (auxr = bstrcpy (repl))) {
 -			if (auxf != find) bdestroy (auxf);
 -			return BSTR_ERR;
 -		}
 -	}
 -
 -	delta = auxf->slen - auxr->slen;
 -
 -	/* in-place replacement since find and replace strings are of equal 
 -	   length */
 -	if (delta == 0) {
 -		while ((pos = instr (b, pos, auxf)) >= 0) {
 -			bstr__memcpy (b->data + pos, auxr->data, auxr->slen);
 -			pos += auxf->slen;
 -		}
 -		if (auxf != find) bdestroy (auxf);
 -		if (auxr != repl) bdestroy (auxr);
 -		return BSTR_OK;
 -	}
 -
 -	/* shrinking replacement since auxf->slen > auxr->slen */
 -	if (delta > 0) {
 -		acc = 0;
 -
 -		while ((i = instr (b, pos, auxf)) >= 0) {
 -			if (acc && i > pos)
 -				bstr__memmove (b->data + pos - acc, b->data + pos, i - pos);
 -			if (auxr->slen)
 -				bstr__memcpy (b->data + i - acc, auxr->data, auxr->slen);
 -			acc += delta;
 -			pos = i + auxf->slen;
 -		}
 -
 -		if (acc) {
 -			i = b->slen;
 -			if (i > pos)
 -				bstr__memmove (b->data + pos - acc, b->data + pos, i - pos);
 -			b->slen -= acc;
 -			b->data[b->slen] = (unsigned char) '\0';
 -		}
 -
 -		if (auxf != find) bdestroy (auxf);
 -		if (auxr != repl) bdestroy (auxr);
 -		return BSTR_OK;
 -	}
 -
 -	/* expanding replacement since find->slen < repl->slen.  Its a lot 
 -	   more complicated.  This works by first finding all the matches and 
 -	   storing them to a growable array, then doing at most one resize of
 -	   the destination bstring and then performing the direct memory transfers
 -	   of the string segment pieces to form the final result. The growable 
 -	   array of matches uses a deferred doubling reallocing strategy.  What 
 -	   this means is that it starts as a reasonably fixed sized auto array in 
 -	   the hopes that many if not most cases will never need to grow this 
 -	   array.  But it switches as soon as the bounds of the array will be 
 -	   exceeded.  An extra find result is always appended to this array that
 -	   corresponds to the end of the destination string, so slen is checked
 -	   against mlen - 1 rather than mlen before resizing.
 -	*/
 -
 -	mlen = INITIAL_STATIC_FIND_INDEX_COUNT;
 -	d = (int *) static_d; /* Avoid malloc for trivial/initial cases */
 -	acc = slen = 0;
 -
 -	while ((pos = instr (b, pos, auxf)) >= 0) {
 -		if (slen >= mlen - 1) {
 -			int sl, *t;
 -
 -			mlen += mlen;
 -			sl = sizeof (int *) * mlen;
 -			if (static_d == d) d = NULL; /* static_d cannot be realloced */
 -			if (mlen <= 0 || sl < mlen || NULL == (t = (int *) bstr__realloc (d, sl))) {
 -				ret = BSTR_ERR;
 -				goto done;
 -			}
 -			if (NULL == d) bstr__memcpy (t, static_d, sizeof (static_d));
 -			d = t;
 -		}
 -		d[slen] = pos;
 -		slen++;
 -		acc -= delta;
 -		pos += auxf->slen;
 -		if (pos < 0 || acc < 0) {
 -			ret = BSTR_ERR;
 -			goto done;
 -		}
 -	}
 -	
 -	/* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */
 -	d[slen] = b->slen;
 -
 -	if (BSTR_OK == (ret = balloc (b, b->slen + acc + 1))) {
 -		b->slen += acc;
 -		for (i = slen-1; i >= 0; i--) {
 -			int s, l;
 -			s = d[i] + auxf->slen;
 -			l = d[i+1] - s; /* d[slen] may be accessed here. */
 -			if (l) {
 -				bstr__memmove (b->data + s + acc, b->data + s, l);
 -			}
 -			if (auxr->slen) {
 -				bstr__memmove (b->data + s + acc - auxr->slen, 
 -				               auxr->data, auxr->slen);
 -			}
 -			acc += delta;		
 -		}
 -		b->data[b->slen] = (unsigned char) '\0';
 -	}
 -
 -	done:;
 -	if (static_d == d) d = NULL;
 -	bstr__free (d);
 -	if (auxf != find) bdestroy (auxf);
 -	if (auxr != repl) bdestroy (auxr);
 -	return ret;
 -}
 -
 -/*  int bfindreplace (bstring b, const_bstring find, const_bstring repl, 
 - *                    int pos)
 - *
 - *  Replace all occurrences of a find string with a replace string after a
 - *  given point in a bstring.
 - */
 -int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos) {
 -	return findreplaceengine (b, find, repl, pos, binstr);
 -}
 -
 -/*  int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, 
 - *                    int pos)
 - *
 - *  Replace all occurrences of a find string, ignoring case, with a replace 
 - *  string after a given point in a bstring.
 - */
 -int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos) {
 -	return findreplaceengine (b, find, repl, pos, binstrcaseless);
 -}
 -
 -/*  int binsertch (bstring b, int pos, int len, unsigned char fill)
 - *
 - *  Inserts the character fill repeatedly into b at position pos for a 
 - *  length len.  If the position pos is past the end of b, then the 
 - *  character "fill" is appended as necessary to make up the gap between the 
 - *  end of b and the position pos + len.
 - */
 -int binsertch (bstring b, int pos, int len, unsigned char fill) {
 -int d, l, i;
 -
 -	if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen ||
 -	    b->mlen <= 0 || len < 0) return BSTR_ERR;
 -
 -	/* Compute the two possible end pointers */
 -	d = b->slen + len;
 -	l = pos + len;
 -	if ((d|l) < 0) return BSTR_ERR;
 -
 -	if (l > d) {
 -		/* Inserting past the end of the string */
 -		if (balloc (b, l + 1) != BSTR_OK) return BSTR_ERR;
 -		pos = b->slen;
 -		b->slen = l;
 -	} else {
 -		/* Inserting in the middle of the string */
 -		if (balloc (b, d + 1) != BSTR_OK) return BSTR_ERR;
 -		for (i = d - 1; i >= l; i--) {
 -			b->data[i] = b->data[i - len];
 -		}
 -		b->slen = d;
 -	}
 -
 -	for (i=pos; i < l; i++) b->data[i] = fill;
 -	b->data[b->slen] = (unsigned char) '\0';
 -	return BSTR_OK;
 -}
 -
 -/*  int bpattern (bstring b, int len)
 - *
 - *  Replicate the bstring, b in place, end to end repeatedly until it 
 - *  surpasses len characters, then chop the result to exactly len characters. 
 - *  This function operates in-place.  The function will return with BSTR_ERR 
 - *  if b is NULL or of length 0, otherwise BSTR_OK is returned.
 - */
 -int bpattern (bstring b, int len) {
 -int i, d;
 -
 -	d = blength (b);
 -	if (d <= 0 || len < 0 || balloc (b, len + 1) != BSTR_OK) return BSTR_ERR;
 -	if (len > 0) {
 -		if (d == 1) return bsetstr (b, len, NULL, b->data[0]);
 -		for (i = d; i < len; i++) b->data[i] = b->data[i - d];
 -	}
 -	b->data[len] = (unsigned char) '\0';
 -	b->slen = len;
 -	return BSTR_OK;
 -}
 -
 -#define BS_BUFF_SZ (1024)
 -
 -/*  int breada (bstring b, bNread readPtr, void * parm)
 - *
 - *  Use a finite buffer fread-like function readPtr to concatenate to the 
 - *  bstring b the entire contents of file-like source data in a roughly 
 - *  efficient way.
 - */
 -int breada (bstring b, bNread readPtr, void * parm) {
 -int i, l, n;
 -
 -	if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
 -	    b->mlen <= 0 || readPtr == NULL) return BSTR_ERR;
 -
 -	i = b->slen;
 -	for (n=i+16; ; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) {
 -		if (BSTR_OK != balloc (b, n + 1)) return BSTR_ERR;
 -		l = (int) readPtr ((void *) (b->data + i), 1, n - i, parm);
 -		i += l;
 -		b->slen = i;
 -		if (i < n) break;
 -	}
 -
 -	b->data[i] = (unsigned char) '\0';
 -	return BSTR_OK;
 -}
 -
 -/*  bstring bread (bNread readPtr, void * parm)
 - *
 - *  Use a finite buffer fread-like function readPtr to create a bstring 
 - *  filled with the entire contents of file-like source data in a roughly 
 - *  efficient way.
 - */
 -bstring bread (bNread readPtr, void * parm) {
 -bstring buff;
 -
 -	if (0 > breada (buff = bfromcstr (""), readPtr, parm)) {
 -		bdestroy (buff);
 -		return NULL;
 -	}
 -	return buff;
 -}
 -
 -/*  int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator)
 - *
 - *  Use an fgetc-like single character stream reading function (getcPtr) to 
 - *  obtain a sequence of characters which are concatenated to the end of the
 - *  bstring b.  The stream read is terminated by the passed in terminator 
 - *  parameter.
 - *
 - *  If getcPtr returns with a negative number, or the terminator character 
 - *  (which is appended) is read, then the stream reading is halted and the 
 - *  function returns with a partial result in b.  If there is an empty partial
 - *  result, 1 is returned.  If no characters are read, or there is some other 
 - *  detectable error, BSTR_ERR is returned.
 - */
 -int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) {
 -int c, d, e;
 -
 -	if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
 -	    b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR;
 -	d = 0;
 -	e = b->mlen - 2;
 -
 -	while ((c = getcPtr (parm)) >= 0) {
 -		if (d > e) {
 -			b->slen = d;
 -			if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
 -			e = b->mlen - 2;
 -		}
 -		b->data[d] = (unsigned char) c;
 -		d++;
 -		if (c == terminator) break;
 -	}
 -
 -	b->data[d] = (unsigned char) '\0';
 -	b->slen = d;
 -
 -	return d == 0 && c < 0;
 -}
 -
 -/*  int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator)
 - *
 - *  Use an fgetc-like single character stream reading function (getcPtr) to 
 - *  obtain a sequence of characters which are concatenated to the end of the
 - *  bstring b.  The stream read is terminated by the passed in terminator 
 - *  parameter.
 - *
 - *  If getcPtr returns with a negative number, or the terminator character 
 - *  (which is appended) is read, then the stream reading is halted and the 
 - *  function returns with a partial result concatentated to b.  If there is 
 - *  an empty partial result, 1 is returned.  If no characters are read, or 
 - *  there is some other detectable error, BSTR_ERR is returned.
 - */
 -int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) {
 -int c, d, e;
 -
 -	if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
 -	    b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR;
 -	d = b->slen;
 -	e = b->mlen - 2;
 -
 -	while ((c = getcPtr (parm)) >= 0) {
 -		if (d > e) {
 -			b->slen = d;
 -			if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
 -			e = b->mlen - 2;
 -		}
 -		b->data[d] = (unsigned char) c;
 -		d++;
 -		if (c == terminator) break;
 -	}
 -
 -	b->data[d] = (unsigned char) '\0';
 -	b->slen = d;
 -
 -	return d == 0 && c < 0;
 -}
 -
 -/*  bstring bgets (bNgetc getcPtr, void * parm, char terminator)
 - *
 - *  Use an fgetc-like single character stream reading function (getcPtr) to 
 - *  obtain a sequence of characters which are concatenated into a bstring.  
 - *  The stream read is terminated by the passed in terminator function.
 - *
 - *  If getcPtr returns with a negative number, or the terminator character 
 - *  (which is appended) is read, then the stream reading is halted and the 
 - *  result obtained thus far is returned.  If no characters are read, or 
 - *  there is some other detectable error, NULL is returned.
 - */
 -bstring bgets (bNgetc getcPtr, void * parm, char terminator) {
 -bstring buff;
 -
 -	if (0 > bgetsa (buff = bfromcstr (""), getcPtr, parm, terminator) || 0 >= buff->slen) {
 -		bdestroy (buff);
 -		buff = NULL;
 -	}
 -	return buff;
 -}
 -
 -struct bStream {
 -	bstring buff;		/* Buffer for over-reads */
 -	void * parm;		/* The stream handle for core stream */
 -	bNread readFnPtr;	/* fread compatible fnptr for core stream */
 -	int isEOF;		/* track file's EOF state */
 -	int maxBuffSz;
 -};
 -
 -/*  struct bStream * bsopen (bNread readPtr, void * parm)
 - *
 - *  Wrap a given open stream (described by a fread compatible function 
 - *  pointer and stream handle) into an open bStream suitable for the bstring 
 - *  library streaming functions.
 - */
 -struct bStream * bsopen (bNread readPtr, void * parm) {
 -struct bStream * s;
 -
 -	if (readPtr == NULL) return NULL;
 -	s = (struct bStream *) bstr__alloc (sizeof (struct bStream));
 -	if (s == NULL) return NULL;
 -	s->parm = parm;
 -	s->buff = bfromcstr ("");
 -	s->readFnPtr = readPtr;
 -	s->maxBuffSz = BS_BUFF_SZ;
 -	s->isEOF = 0;
 -	return s;
 -}
 -
 -/*  int bsbufflength (struct bStream * s, int sz)
 - *
 - *  Set the length of the buffer used by the bStream.  If sz is zero, the 
 - *  length is not set.  This function returns with the previous length.
 - */
 -int bsbufflength (struct bStream * s, int sz) {
 -int oldSz;
 -	if (s == NULL || sz < 0) return BSTR_ERR;
 -	oldSz = s->maxBuffSz;
 -	if (sz > 0) s->maxBuffSz = sz;
 -	return oldSz;
 -}
 -
 -int bseof (const struct bStream * s) {
 -	if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR;
 -	return s->isEOF && (s->buff->slen == 0);
 -}
 -
 -/*  void * bsclose (struct bStream * s)
 - *
 - *  Close the bStream, and return the handle to the stream that was originally
 - *  used to open the given stream.
 - */
 -void * bsclose (struct bStream * s) {
 -void * parm;
 -	if (s == NULL) return NULL;
 -	s->readFnPtr = NULL;
 -	if (s->buff) bdestroy (s->buff);
 -	s->buff = NULL;
 -	parm = s->parm;
 -	s->parm = NULL;
 -	s->isEOF = 1;
 -	bstr__free (s);
 -	return parm;
 -}
 -
 -/*  int bsreadlna (bstring r, struct bStream * s, char terminator)
 - *
 - *  Read a bstring terminated by the terminator character or the end of the
 - *  stream from the bStream (s) and return it into the parameter r.  This 
 - *  function may read additional characters from the core stream that are not 
 - *  returned, but will be retained for subsequent read operations.
 - */
 -int bsreadlna (bstring r, struct bStream * s, char terminator) {
 -int i, l, ret, rlo;
 -char * b;
 -struct tagbstring x;
 -
 -	if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 ||
 -	    r->slen < 0 || r->mlen < r->slen) return BSTR_ERR;
 -	l = s->buff->slen;
 -	if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
 -	b = (char *) s->buff->data;
 -	x.data = (unsigned char *) b;
 -
 -	/* First check if the current buffer holds the terminator */
 -	b[l] = terminator; /* Set sentinel */
 -	for (i=0; b[i] != terminator; i++) ;
 -	if (i < l) {
 -		x.slen = i + 1;
 -		ret = bconcat (r, &x);
 -		s->buff->slen = l;
 -		if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1);
 -		return BSTR_OK;
 -	}
 -
 -	rlo = r->slen;
 -
 -	/* If not then just concatenate the entire buffer to the output */
 -	x.slen = l;
 -	if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR;
 -
 -	/* Perform direct in-place reads into the destination to allow for
 -	   the minimum of data-copies */
 -	for (;;) {
 -		if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR;
 -		b = (char *) (r->data + r->slen);
 -		l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm);
 -		if (l <= 0) {
 -			r->data[r->slen] = (unsigned char) '\0';
 -			s->buff->slen = 0;
 -			s->isEOF = 1;
 -			/* If nothing was read return with an error message */
 -			return BSTR_ERR & -(r->slen == rlo);
 -		}
 -		b[l] = terminator; /* Set sentinel */
 -		for (i=0; b[i] != terminator; i++) ;
 -		if (i < l) break;
 -		r->slen += l;
 -	}
 -
 -	/* Terminator found, push over-read back to buffer */
 -	i++;
 -	r->slen += i;
 -	s->buff->slen = l - i;
 -	bstr__memcpy (s->buff->data, b + i, l - i);
 -	r->data[r->slen] = (unsigned char) '\0';
 -	return BSTR_OK;
 -}
 -
 -/*  int bsreadlnsa (bstring r, struct bStream * s, bstring term)
 - *
 - *  Read a bstring terminated by any character in the term string or the end 
 - *  of the stream from the bStream (s) and return it into the parameter r.  
 - *  This function may read additional characters from the core stream that 
 - *  are not returned, but will be retained for subsequent read operations.
 - */
 -int bsreadlnsa (bstring r, struct bStream * s, const_bstring term) {
 -int i, l, ret, rlo;
 -unsigned char * b;
 -struct tagbstring x;
 -struct charField cf;
 -
 -	if (s == NULL || s->buff == NULL || r == NULL || term == NULL ||
 -	    term->data == NULL || r->mlen <= 0 || r->slen < 0 ||
 -	    r->mlen < r->slen) return BSTR_ERR;
 -	if (term->slen == 1) return bsreadlna (r, s, term->data[0]);
 -	if (term->slen < 1 || buildCharField (&cf, term)) return BSTR_ERR;
 -
 -	l = s->buff->slen;
 -	if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
 -	b = (unsigned char *) s->buff->data;
 -	x.data = b;
 -
 -	/* First check if the current buffer holds the terminator */
 -	b[l] = term->data[0]; /* Set sentinel */
 -	for (i=0; !testInCharField (&cf, b[i]); i++) ;
 -	if (i < l) {
 -		x.slen = i + 1;
 -		ret = bconcat (r, &x);
 -		s->buff->slen = l;
 -		if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1);
 -		return BSTR_OK;
 -	}
 -
 -	rlo = r->slen;
 -
 -	/* If not then just concatenate the entire buffer to the output */
 -	x.slen = l;
 -	if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR;
 -
 -	/* Perform direct in-place reads into the destination to allow for
 -	   the minimum of data-copies */
 -	for (;;) {
 -		if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR;
 -		b = (unsigned char *) (r->data + r->slen);
 -		l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm);
 -		if (l <= 0) {
 -			r->data[r->slen] = (unsigned char) '\0';
 -			s->buff->slen = 0;
 -			s->isEOF = 1;
 -			/* If nothing was read return with an error message */
 -			return BSTR_ERR & -(r->slen == rlo);
 -		}
 -
 -		b[l] = term->data[0]; /* Set sentinel */
 -		for (i=0; !testInCharField (&cf, b[i]); i++) ;
 -		if (i < l) break;
 -		r->slen += l;
 -	}
 -
 -	/* Terminator found, push over-read back to buffer */
 -	i++;
 -	r->slen += i;
 -	s->buff->slen = l - i;
 -	bstr__memcpy (s->buff->data, b + i, l - i);
 -	r->data[r->slen] = (unsigned char) '\0';
 -	return BSTR_OK;
 -}
 -
 -/*  int bsreada (bstring r, struct bStream * s, int n)
 - *
 - *  Read a bstring of length n (or, if it is fewer, as many bytes as is 
 - *  remaining) from the bStream.  This function may read additional 
 - *  characters from the core stream that are not returned, but will be 
 - *  retained for subsequent read operations.  This function will not read
 - *  additional characters from the core stream beyond virtual stream pointer.
 - */
 -int bsreada (bstring r, struct bStream * s, int n) {
 -int l, ret, orslen;
 -char * b;
 -struct tagbstring x;
 -
 -	if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0
 -	 || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR;
 -
 -	n += r->slen;
 -	if (n <= 0) return BSTR_ERR;
 -
 -	l = s->buff->slen;
 -
 -	orslen = r->slen;
 -
 -	if (0 == l) {
 -		if (s->isEOF) return BSTR_ERR;
 -		if (r->mlen > n) {
 -			l = (int) s->readFnPtr (r->data + r->slen, 1, n - r->slen, s->parm);
 -			if (0 >= l || l > n - r->slen) {
 -				s->isEOF = 1;
 -				return BSTR_ERR;
 -			}
 -			r->slen += l;
 -			r->data[r->slen] = (unsigned char) '\0';
 -			return 0;
 -		}
 -	}
 -
 -	if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
 -	b = (char *) s->buff->data;
 -	x.data = (unsigned char *) b;
 -
 -	do {
 -		if (l + r->slen >= n) {
 -			x.slen = n - r->slen;
 -			ret = bconcat (r, &x);
 -			s->buff->slen = l;
 -			if (BSTR_OK == ret) bdelete (s->buff, 0, x.slen);
 -			return BSTR_ERR & -(r->slen == orslen);
 -		}
 -
 -		x.slen = l;
 -		if (BSTR_OK != bconcat (r, &x)) break;
 -
 -		l = n - r->slen;
 -		if (l > s->maxBuffSz) l = s->maxBuffSz;
 -
 -		l = (int) s->readFnPtr (b, 1, l, s->parm);
 -
 -	} while (l > 0);
 -	if (l < 0) l = 0;
 -	if (l == 0) s->isEOF = 1;
 -	s->buff->slen = l;
 -	return BSTR_ERR & -(r->slen == orslen);
 -}
 -
 -/*  int bsreadln (bstring r, struct bStream * s, char terminator)
 - *
 - *  Read a bstring terminated by the terminator character or the end of the
 - *  stream from the bStream (s) and return it into the parameter r.  This 
 - *  function may read additional characters from the core stream that are not 
 - *  returned, but will be retained for subsequent read operations.
 - */
 -int bsreadln (bstring r, struct bStream * s, char terminator) {
 -	if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0)
 -		return BSTR_ERR;
 -	if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
 -	r->slen = 0;
 -	return bsreadlna (r, s, terminator);
 -}
 -
 -/*  int bsreadlns (bstring r, struct bStream * s, bstring term)
 - *
 - *  Read a bstring terminated by any character in the term string or the end 
 - *  of the stream from the bStream (s) and return it into the parameter r.  
 - *  This function may read additional characters from the core stream that 
 - *  are not returned, but will be retained for subsequent read operations.
 - */
 -int bsreadlns (bstring r, struct bStream * s, const_bstring term) {
 -	if (s == NULL || s->buff == NULL || r == NULL || term == NULL 
 -	 || term->data == NULL || r->mlen <= 0) return BSTR_ERR;
 -	if (term->slen == 1) return bsreadln (r, s, term->data[0]);
 -	if (term->slen < 1) return BSTR_ERR;
 -	if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
 -	r->slen = 0;
 -	return bsreadlnsa (r, s, term);
 -}
 -
 -/*  int bsread (bstring r, struct bStream * s, int n)
 - *
 - *  Read a bstring of length n (or, if it is fewer, as many bytes as is 
 - *  remaining) from the bStream.  This function may read additional 
 - *  characters from the core stream that are not returned, but will be 
 - *  retained for subsequent read operations.  This function will not read
 - *  additional characters from the core stream beyond virtual stream pointer.
 - */
 -int bsread (bstring r, struct bStream * s, int n) {
 -	if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0
 -	 || n <= 0) return BSTR_ERR;
 -	if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
 -	r->slen = 0;
 -	return bsreada (r, s, n);
 -}
 -
 -/*  int bsunread (struct bStream * s, const_bstring b)
 - *
 - *  Insert a bstring into the bStream at the current position.  These 
 - *  characters will be read prior to those that actually come from the core 
 - *  stream.
 - */
 -int bsunread (struct bStream * s, const_bstring b) {
 -	if (s == NULL || s->buff == NULL) return BSTR_ERR;
 -	return binsert (s->buff, 0, b, (unsigned char) '?');
 -}
 -
 -/*  int bspeek (bstring r, const struct bStream * s)
 - *
 - *  Return the currently buffered characters from the bStream that will be 
 - *  read prior to reads from the core stream.
 - */
 -int bspeek (bstring r, const struct bStream * s) {
 -	if (s == NULL || s->buff == NULL) return BSTR_ERR;
 -	return bassign (r, s->buff);
 -}
 -
 -/*  bstring bjoin (const struct bstrList * bl, const_bstring sep);
 - *
 - *  Join the entries of a bstrList into one bstring by sequentially 
 - *  concatenating them with the sep string in between.  If there is an error 
 - *  NULL is returned, otherwise a bstring with the correct result is returned.
 - */
 -bstring bjoin (const struct bstrList * bl, const_bstring sep) {
 -bstring b;
 -int i, c, v;
 -
 -	if (bl == NULL || bl->qty < 0) return NULL;
 -	if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL;
 -
 -	for (i = 0, c = 1; i < bl->qty; i++) {
 -		v = bl->entry[i]->slen;
 -		if (v < 0) return NULL;	/* Invalid input */
 -		c += v;
 -		if (c < 0) return NULL;	/* Wrap around ?? */
 -	}
 -
 -	if (sep != NULL) c += (bl->qty - 1) * sep->slen;
 -
 -	b = (bstring) bstr__alloc (sizeof (struct tagbstring));
 -	if (NULL == b) return NULL; /* Out of memory */
 -	b->data = (unsigned char *) bstr__alloc (c);
 -	if (b->data == NULL) {
 -		bstr__free (b);
 -		return NULL;
 -	}
 -
 -	b->mlen = c;
 -	b->slen = c-1;
 -
 -	for (i = 0, c = 0; i < bl->qty; i++) {
 -		if (i > 0 && sep != NULL) {
 -			bstr__memcpy (b->data + c, sep->data, sep->slen);
 -			c += sep->slen;
 -		}
 -		v = bl->entry[i]->slen;
 -		bstr__memcpy (b->data + c, bl->entry[i]->data, v);
 -		c += v;
 -	}
 -	b->data[c] = (unsigned char) '\0';
 -	return b;
 -}
 -
 -#define BSSSC_BUFF_LEN (256)
 -
 -/*  int bssplitscb (struct bStream * s, const_bstring splitStr, 
 - *	int (* cb) (void * parm, int ofs, const_bstring entry), void * parm)
 - *
 - *  Iterate the set of disjoint sequential substrings read from a stream 
 - *  divided by any of the characters in splitStr.  An empty splitStr causes 
 - *  the whole stream to be iterated once.
 - *
 - *  Note: At the point of calling the cb function, the bStream pointer is 
 - *  pointed exactly at the position right after having read the split 
 - *  character.  The cb function can act on the stream by causing the bStream
 - *  pointer to move, and bssplitscb will continue by starting the next split
 - *  at the position of the pointer after the return from cb.
 - *
 - *  However, if the cb causes the bStream s to be destroyed then the cb must
 - *  return with a negative value, otherwise bssplitscb will continue in an 
 - *  undefined manner.
 - */
 -int bssplitscb (struct bStream * s, const_bstring splitStr, 
 -	int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) {
 -struct charField chrs;
 -bstring buff;
 -int i, p, ret;
 -
 -	if (cb == NULL || s == NULL || s->readFnPtr == NULL 
 -	 || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
 -
 -	if (NULL == (buff = bfromcstr (""))) return BSTR_ERR;
 -
 -	if (splitStr->slen == 0) {
 -		while (bsreada (buff, s, BSSSC_BUFF_LEN) >= 0) ;
 -		if ((ret = cb (parm, 0, buff)) > 0) 
 -			ret = 0;
 -	} else {
 -		buildCharField (&chrs, splitStr);
 -		ret = p = i = 0;
 -		for (;;) {
 -			if (i >= buff->slen) {
 -				bsreada (buff, s, BSSSC_BUFF_LEN);
 -				if (i >= buff->slen) {
 -					if (0 < (ret = cb (parm, p, buff))) ret = 0;
 -					break;
 -				}
 -			}
 -			if (testInCharField (&chrs, buff->data[i])) {
 -				struct tagbstring t;
 -				unsigned char c;
 -
 -				blk2tbstr (t, buff->data + i + 1, buff->slen - (i + 1));
 -				if ((ret = bsunread (s, &t)) < 0) break;
 -				buff->slen = i;
 -				c = buff->data[i];
 -				buff->data[i] = (unsigned char) '\0';
 -				if ((ret = cb (parm, p, buff)) < 0) break;
 -				buff->data[i] = c;
 -				buff->slen = 0;
 -				p += i + 1;
 -				i = -1;
 -			}
 -			i++;
 -		}
 -	}
 -
 -	bdestroy (buff);
 -	return ret;
 -}
 -
 -/*  int bssplitstrcb (struct bStream * s, const_bstring splitStr, 
 - *	int (* cb) (void * parm, int ofs, const_bstring entry), void * parm)
 - *
 - *  Iterate the set of disjoint sequential substrings read from a stream 
 - *  divided by the entire substring splitStr.  An empty splitStr causes 
 - *  each character of the stream to be iterated.
 - *
 - *  Note: At the point of calling the cb function, the bStream pointer is 
 - *  pointed exactly at the position right after having read the split 
 - *  character.  The cb function can act on the stream by causing the bStream
 - *  pointer to move, and bssplitscb will continue by starting the next split
 - *  at the position of the pointer after the return from cb.
 - *
 - *  However, if the cb causes the bStream s to be destroyed then the cb must
 - *  return with a negative value, otherwise bssplitscb will continue in an 
 - *  undefined manner.
 - */
 -int bssplitstrcb (struct bStream * s, const_bstring splitStr, 
 -	int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) {
 -bstring buff;
 -int i, p, ret;
 -
 -	if (cb == NULL || s == NULL || s->readFnPtr == NULL 
 -	 || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
 -
 -	if (splitStr->slen == 1) return bssplitscb (s, splitStr, cb, parm);
 -
 -	if (NULL == (buff = bfromcstr (""))) return BSTR_ERR;
 -
 -	if (splitStr->slen == 0) {
 -		for (i=0; bsreada (buff, s, BSSSC_BUFF_LEN) >= 0; i++) {
 -			if ((ret = cb (parm, 0, buff)) < 0) {
 -				bdestroy (buff);
 -				return ret;
 -			}
 -			buff->slen = 0;
 -		}
 -		return BSTR_OK;
 -	} else {
 -		ret = p = i = 0;
 -		for (i=p=0;;) {
 -			if ((ret = binstr (buff, 0, splitStr)) >= 0) {
 -				struct tagbstring t;
 -				blk2tbstr (t, buff->data, ret);
 -				i = ret + splitStr->slen;
 -				if ((ret = cb (parm, p, &t)) < 0) break;
 -				p += i;
 -				bdelete (buff, 0, i);
 -			} else {
 -				bsreada (buff, s, BSSSC_BUFF_LEN);
 -				if (bseof (s)) {
 -					if ((ret = cb (parm, p, buff)) > 0) ret = 0;
 -					break;
 -				}
 -			}
 -		}
 -	}
 -
 -	bdestroy (buff);
 -	return ret;
 -}
 -
 -/*  int bstrListCreate (void)
 - *
 - *  Create a bstrList.
 - */
 -struct bstrList * bstrListCreate (void) {
 -struct bstrList * sl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
 -	if (sl) {
 -		sl->entry = (bstring *) bstr__alloc (1*sizeof (bstring));
 -		if (!sl->entry) {
 -			bstr__free (sl);
 -			sl = NULL;
 -		} else {
 -			sl->qty = 0;
 -			sl->mlen = 1;
 -		}
 -	}
 -	return sl;
 -}
 -
 -/*  int bstrListDestroy (struct bstrList * sl)
 - *
 - *  Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate.
 - */
 -int bstrListDestroy (struct bstrList * sl) {
 -int i;
 -	if (sl == NULL || sl->qty < 0) return BSTR_ERR;
 -	for (i=0; i < sl->qty; i++) {
 -		if (sl->entry[i]) {
 -			bdestroy (sl->entry[i]);
 -			sl->entry[i] = NULL;
 -		}
 -	}
 -	sl->qty  = -1;
 -	sl->mlen = -1;
 -	bstr__free (sl->entry);
 -	sl->entry = NULL;
 -	bstr__free (sl);
 -	return BSTR_OK;
 -}
 -
 -/*  int bstrListAlloc (struct bstrList * sl, int msz)
 - *
 - *  Ensure that there is memory for at least msz number of entries for the
 - *  list.
 - */
 -int bstrListAlloc (struct bstrList * sl, int msz) {
 -bstring * l;
 -int smsz;
 -size_t nsz;
 -	if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR;
 -	if (sl->mlen >= msz) return BSTR_OK;
 -	smsz = snapUpSize (msz);
 -	nsz = ((size_t) smsz) * sizeof (bstring);
 -	if (nsz < (size_t) smsz) return BSTR_ERR;
 -	l = (bstring *) bstr__realloc (sl->entry, nsz);
 -	if (!l) {
 -		smsz = msz;
 -		nsz = ((size_t) smsz) * sizeof (bstring);
 -		l = (bstring *) bstr__realloc (sl->entry, nsz);
 -		if (!l) return BSTR_ERR;
 -	}
 -	sl->mlen = smsz;
 -	sl->entry = l;
 -	return BSTR_OK;
 -}
 -
 -/*  int bstrListAllocMin (struct bstrList * sl, int msz)
 - *
 - *  Try to allocate the minimum amount of memory for the list to include at
 - *  least msz entries or sl->qty whichever is greater.
 - */
 -int bstrListAllocMin (struct bstrList * sl, int msz) {
 -bstring * l;
 -size_t nsz;
 -	if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR;
 -	if (msz < sl->qty) msz = sl->qty;
 -	if (sl->mlen == msz) return BSTR_OK;
 -	nsz = ((size_t) msz) * sizeof (bstring);
 -	if (nsz < (size_t) msz) return BSTR_ERR;
 -	l = (bstring *) bstr__realloc (sl->entry, nsz);
 -	if (!l) return BSTR_ERR;
 -	sl->mlen = msz;
 -	sl->entry = l;
 -	return BSTR_OK;
 -}
 -
 -/*  int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
 - *	int (* cb) (void * parm, int ofs, int len), void * parm)
 - *
 - *  Iterate the set of disjoint sequential substrings over str divided by the
 - *  character in splitChar.
 - *
 - *  Note: Non-destructive modification of str from within the cb function 
 - *  while performing this split is not undefined.  bsplitcb behaves in 
 - *  sequential lock step with calls to cb.  I.e., after returning from a cb 
 - *  that return a non-negative integer, bsplitcb continues from the position 
 - *  1 character after the last detected split character and it will halt 
 - *  immediately if the length of str falls below this point.  However, if the 
 - *  cb function destroys str, then it *must* return with a negative value, 
 - *  otherwise bsplitcb will continue in an undefined manner.
 - */
 -int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
 -	int (* cb) (void * parm, int ofs, int len), void * parm) {
 -int i, p, ret;
 -
 -	if (cb == NULL || str == NULL || pos < 0 || pos > str->slen) 
 -		return BSTR_ERR;
 -
 -	p = pos;
 -	do {
 -		for (i=p; i < str->slen; i++) {
 -			if (str->data[i] == splitChar) break;
 -		}
 -		if ((ret = cb (parm, p, i - p)) < 0) return ret;
 -		p = i + 1;
 -	} while (p <= str->slen);
 -	return BSTR_OK;
 -}
 -
 -/*  int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
 - *	int (* cb) (void * parm, int ofs, int len), void * parm)
 - *
 - *  Iterate the set of disjoint sequential substrings over str divided by any 
 - *  of the characters in splitStr.  An empty splitStr causes the whole str to
 - *  be iterated once.
 - *
 - *  Note: Non-destructive modification of str from within the cb function 
 - *  while performing this split is not undefined.  bsplitscb behaves in 
 - *  sequential lock step with calls to cb.  I.e., after returning from a cb 
 - *  that return a non-negative integer, bsplitscb continues from the position 
 - *  1 character after the last detected split character and it will halt 
 - *  immediately if the length of str falls below this point.  However, if the 
 - *  cb function destroys str, then it *must* return with a negative value, 
 - *  otherwise bsplitscb will continue in an undefined manner.
 - */
 -int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
 -	int (* cb) (void * parm, int ofs, int len), void * parm) {
 -struct charField chrs;
 -int i, p, ret;
 -
 -	if (cb == NULL || str == NULL || pos < 0 || pos > str->slen 
 -	 || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
 -	if (splitStr->slen == 0) {
 -		if ((ret = cb (parm, 0, str->slen)) > 0) ret = 0;
 -		return ret;
 -	}
 -
 -	if (splitStr->slen == 1) 
 -		return bsplitcb (str, splitStr->data[0], pos, cb, parm);
 -
 -	buildCharField (&chrs, splitStr);
 -
 -	p = pos;
 -	do {
 -		for (i=p; i < str->slen; i++) {
 -			if (testInCharField (&chrs, str->data[i])) break;
 -		}
 -		if ((ret = cb (parm, p, i - p)) < 0) return ret;
 -		p = i + 1;
 -	} while (p <= str->slen);
 -	return BSTR_OK;
 -}
 -
 -/*  int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
 - *	int (* cb) (void * parm, int ofs, int len), void * parm)
 - *
 - *  Iterate the set of disjoint sequential substrings over str divided by the 
 - *  substring splitStr.  An empty splitStr causes the whole str to be 
 - *  iterated once.
 - *
 - *  Note: Non-destructive modification of str from within the cb function 
 - *  while performing this split is not undefined.  bsplitstrcb behaves in 
 - *  sequential lock step with calls to cb.  I.e., after returning from a cb 
 - *  that return a non-negative integer, bsplitscb continues from the position 
 - *  1 character after the last detected split character and it will halt 
 - *  immediately if the length of str falls below this point.  However, if the 
 - *  cb function destroys str, then it *must* return with a negative value, 
 - *  otherwise bsplitscb will continue in an undefined manner.
 - */
 -int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
 -	int (* cb) (void * parm, int ofs, int len), void * parm) {
 -int i, p, ret;
 -
 -	if (cb == NULL || str == NULL || pos < 0 || pos > str->slen 
 -	 || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
 -
 -	if (0 == splitStr->slen) {
 -		for (i=pos; i < str->slen; i++) {
 -			if ((ret = cb (parm, i, 1)) < 0) return ret;
 -		}
 -		return BSTR_OK;
 -	}
 -
 -	if (splitStr->slen == 1) 
 -		return bsplitcb (str, splitStr->data[0], pos, cb, parm);
 -
 -	for (i=p=pos; i <= str->slen - splitStr->slen; i++) {
 -		if (0 == bstr__memcmp (splitStr->data, str->data + i, splitStr->slen)) {
 -			if ((ret = cb (parm, p, i - p)) < 0) return ret;
 -			i += splitStr->slen;
 -			p = i;
 -		}
 -	}
 -	if ((ret = cb (parm, p, str->slen - p)) < 0) return ret;
 -	return BSTR_OK;
 -}
 -
 -struct genBstrList {
 -	bstring b;
 -	struct bstrList * bl;
 -};
 -
 -static int bscb (void * parm, int ofs, int len) {
 -struct genBstrList * g = (struct genBstrList *) parm;
 -	if (g->bl->qty >= g->bl->mlen) {
 -		int mlen = g->bl->mlen * 2;
 -		bstring * tbl;
 -
 -		while (g->bl->qty >= mlen) {
 -			if (mlen < g->bl->mlen) return BSTR_ERR;
 -			mlen += mlen;
 -		}
 -
 -		tbl = (bstring *) bstr__realloc (g->bl->entry, sizeof (bstring) * mlen);
 -		if (tbl == NULL) return BSTR_ERR;
 -
 -		g->bl->entry = tbl;
 -		g->bl->mlen = mlen;
 -	}
 -
 -	g->bl->entry[g->bl->qty] = bmidstr (g->b, ofs, len);
 -	g->bl->qty++;
 -	return BSTR_OK;
 -}
 -
 -/*  struct bstrList * bsplit (const_bstring str, unsigned char splitChar)
 - *
 - *  Create an array of sequential substrings from str divided by the character
 - *  splitChar.  
 - */
 -struct bstrList * bsplit (const_bstring str, unsigned char splitChar) {
 -struct genBstrList g;
 -
 -	if (str == NULL || str->data == NULL || str->slen < 0) return NULL;
 -
 -	g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
 -	if (g.bl == NULL) return NULL;
 -	g.bl->mlen = 4;
 -	g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
 -	if (NULL == g.bl->entry) {
 -		bstr__free (g.bl);
 -		return NULL;
 -	}
 -
 -	g.b = (bstring) str;
 -	g.bl->qty = 0;
 -	if (bsplitcb (str, splitChar, 0, bscb, &g) < 0) {
 -		bstrListDestroy (g.bl);
 -		return NULL;
 -	}
 -	return g.bl;
 -}
 -
 -/*  struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr)
 - *
 - *  Create an array of sequential substrings from str divided by the entire
 - *  substring splitStr.
 - */
 -struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) {
 -struct genBstrList g;
 -
 -	if (str == NULL || str->data == NULL || str->slen < 0) return NULL;
 -
 -	g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
 -	if (g.bl == NULL) return NULL;
 -	g.bl->mlen = 4;
 -	g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
 -	if (NULL == g.bl->entry) {
 -		bstr__free (g.bl);
 -		return NULL;
 -	}
 -
 -	g.b = (bstring) str;
 -	g.bl->qty = 0;
 -	if (bsplitstrcb (str, splitStr, 0, bscb, &g) < 0) {
 -		bstrListDestroy (g.bl);
 -		return NULL;
 -	}
 -	return g.bl;
 -}
 -
 -/*  struct bstrList * bsplits (const_bstring str, bstring splitStr)
 - *
 - *  Create an array of sequential substrings from str divided by any of the 
 - *  characters in splitStr.  An empty splitStr causes a single entry bstrList
 - *  containing a copy of str to be returned.
 - */
 -struct bstrList * bsplits (const_bstring str, const_bstring splitStr) {
 -struct genBstrList g;
 -
 -	if (     str == NULL ||      str->slen < 0 ||      str->data == NULL ||
 -	    splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL)
 -		return NULL;
 -
 -	g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
 -	if (g.bl == NULL) return NULL;
 -	g.bl->mlen = 4;
 -	g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
 -	if (NULL == g.bl->entry) {
 -		bstr__free (g.bl);
 -		return NULL;
 -	}
 -	g.b = (bstring) str;
 -	g.bl->qty = 0;
 -
 -	if (bsplitscb (str, splitStr, 0, bscb, &g) < 0) {
 -		bstrListDestroy (g.bl);
 -		return NULL;
 -	}
 -	return g.bl;
 -}
 -
 -#if defined (__TURBOC__) && !defined (__BORLANDC__)
 -# ifndef BSTRLIB_NOVSNP
 -#  define BSTRLIB_NOVSNP
 -# endif
 -#endif
 -
 -/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */
 -#if defined(__WATCOMC__) || defined(_MSC_VER)
 -#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);}
 -#else
 -#ifdef BSTRLIB_NOVSNP
 -/* This is just a hack.  If you are using a system without a vsnprintf, it is 
 -   not recommended that bformat be used at all. */
 -#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;}
 -#define START_VSNBUFF (256)
 -#else
 -
 -#if defined(__GNUC__) && !defined(__APPLE__)
 -/* Something is making gcc complain about this prototype not being here, so 
 -   I've just gone ahead and put it in. */
 -extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg);
 -#endif
 -
 -#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);}
 -#endif
 -#endif
 -
 -#if !defined (BSTRLIB_NOVSNP)
 -
 -#ifndef START_VSNBUFF
 -#define START_VSNBUFF (16)
 -#endif
 -
 -/* On IRIX vsnprintf returns n-1 when the operation would overflow the target 
 -   buffer, WATCOM and MSVC both return -1, while C99 requires that the 
 -   returned value be exactly what the length would be if the buffer would be
 -   large enough.  This leads to the idea that if the return value is larger 
 -   than n, then changing n to the return value will reduce the number of
 -   iterations required. */
 -
 -/*  int bformata (bstring b, const char * fmt, ...)
 - *
 - *  After the first parameter, it takes the same parameters as printf (), but 
 - *  rather than outputting results to stdio, it appends the results to 
 - *  a bstring which contains what would have been output. Note that if there 
 - *  is an early generation of a '\0' character, the bstring will be truncated 
 - *  to this end point.
 - */
 -int bformata (bstring b, const char * fmt, ...) {
 -va_list arglist;
 -bstring buff;
 -int n, r;
 -
 -	if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 
 -	 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
 -
 -	/* Since the length is not determinable beforehand, a search is
 -	   performed using the truncating "vsnprintf" call (to avoid buffer
 -	   overflows) on increasing potential sizes for the output result. */
 -
 -	if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
 -	if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
 -		n = 1;
 -		if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR;
 -	}
 -
 -	for (;;) {
 -		va_start (arglist, fmt);
 -		exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
 -		va_end (arglist);
 -
 -		buff->data[n] = (unsigned char) '\0';
 -		buff->slen = (int) (strlen) ((char *) buff->data);
 -
 -		if (buff->slen < n) break;
 -
 -		if (r > n) n = r; else n += n;
 -
 -		if (BSTR_OK != balloc (buff, n + 2)) {
 -			bdestroy (buff);
 -			return BSTR_ERR;
 -		}
 -	}
 -
 -	r = bconcat (b, buff);
 -	bdestroy (buff);
 -	return r;
 -}
 -
 -/*  int bassignformat (bstring b, const char * fmt, ...)
 - *
 - *  After the first parameter, it takes the same parameters as printf (), but 
 - *  rather than outputting results to stdio, it outputs the results to 
 - *  the bstring parameter b. Note that if there is an early generation of a 
 - *  '\0' character, the bstring will be truncated to this end point.
 - */
 -int bassignformat (bstring b, const char * fmt, ...) {
 -va_list arglist;
 -bstring buff;
 -int n, r;
 -
 -	if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 
 -	 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
 -
 -	/* Since the length is not determinable beforehand, a search is
 -	   performed using the truncating "vsnprintf" call (to avoid buffer
 -	   overflows) on increasing potential sizes for the output result. */
 -
 -	if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
 -	if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
 -		n = 1;
 -		if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR;
 -	}
 -
 -	for (;;) {
 -		va_start (arglist, fmt);
 -		exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
 -		va_end (arglist);
 -
 -		buff->data[n] = (unsigned char) '\0';
 -		buff->slen = (int) (strlen) ((char *) buff->data);
 -
 -		if (buff->slen < n) break;
 -
 -		if (r > n) n = r; else n += n;
 -
 -		if (BSTR_OK != balloc (buff, n + 2)) {
 -			bdestroy (buff);
 -			return BSTR_ERR;
 -		}
 -	}
 -
 -	r = bassign (b, buff);
 -	bdestroy (buff);
 -	return r;
 -}
 -
 -/*  bstring bformat (const char * fmt, ...)
 - *
 - *  Takes the same parameters as printf (), but rather than outputting results
 - *  to stdio, it forms a bstring which contains what would have been output.
 - *  Note that if there is an early generation of a '\0' character, the 
 - *  bstring will be truncated to this end point.
 - */
 -bstring bformat (const char * fmt, ...) {
 -va_list arglist;
 -bstring buff;
 -int n, r;
 -
 -	if (fmt == NULL) return NULL;
 -
 -	/* Since the length is not determinable beforehand, a search is
 -	   performed using the truncating "vsnprintf" call (to avoid buffer
 -	   overflows) on increasing potential sizes for the output result. */
 -
 -	if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
 -	if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
 -		n = 1;
 -		if (NULL == (buff = bfromcstralloc (n + 2, ""))) return NULL;
 -	}
 -
 -	for (;;) {
 -		va_start (arglist, fmt);
 -		exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
 -		va_end (arglist);
 -
 -		buff->data[n] = (unsigned char) '\0';
 -		buff->slen = (int) (strlen) ((char *) buff->data);
 -
 -		if (buff->slen < n) break;
 -
 -		if (r > n) n = r; else n += n;
 -
 -		if (BSTR_OK != balloc (buff, n + 2)) {
 -			bdestroy (buff);
 -			return NULL;
 -		}
 -	}
 -
 -	return buff;
 -}
 -
 -/*  int bvcformata (bstring b, int count, const char * fmt, va_list arglist)
 - *
 - *  The bvcformata function formats data under control of the format control 
 - *  string fmt and attempts to append the result to b.  The fmt parameter is 
 - *  the same as that of the printf function.  The variable argument list is 
 - *  replaced with arglist, which has been initialized by the va_start macro.
 - *  The size of the appended output is upper bounded by count.  If the 
 - *  required output exceeds count, the string b is not augmented with any 
 - *  contents and a value below BSTR_ERR is returned.  If a value below -count 
 - *  is returned then it is recommended that the negative of this value be 
 - *  used as an update to the count in a subsequent pass.  On other errors, 
 - *  such as running out of memory, parameter errors or numeric wrap around 
 - *  BSTR_ERR is returned.  BSTR_OK is returned when the output is successfully 
 - *  generated and appended to b.
 - *
 - *  Note: There is no sanity checking of arglist, and this function is
 - *  destructive of the contents of b from the b->slen point onward.  If there 
 - *  is an early generation of a '\0' character, the bstring will be truncated 
 - *  to this end point.
 - */
 -int bvcformata (bstring b, int count, const char * fmt, va_list arg) {
 -int n, r, l;
 -
 -	if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL
 -	 || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
 -
 -	if (count > (n = b->slen + count) + 2) return BSTR_ERR;
 -	if (BSTR_OK != balloc (b, n + 2)) return BSTR_ERR;
 -
 -	exvsnprintf (r, (char *) b->data + b->slen, count + 2, fmt, arg);
 -
 -	/* Did the operation complete successfully within bounds? */
 -	for (l = b->slen; l <= n; l++) {
 -		if ('\0' == b->data[l]) {
 -			b->slen = l;
 -			return BSTR_OK;
 -		}
 -	}
 -
 -	/* Abort, since the buffer was not large enough.  The return value 
 -	   tries to help set what the retry length should be. */
 -
 -	b->data[b->slen] = '\0';
 -	if (r > count + 1) {	/* Does r specify a particular target length? */
 -		n = r;
 -	} else {
 -		n = count + count;	/* If not, just double the size of count */
 -		if (count > n) n = INT_MAX;
 -	}
 -	n = -n;
 -
 -	if (n > BSTR_ERR-1) n = BSTR_ERR-1;
 -	return n;
 -}
 -
 -#endif
 diff --git a/src/bstrlib.h b/src/bstrlib.h deleted file mode 100644 index c8fa694..0000000 --- a/src/bstrlib.h +++ /dev/null @@ -1,304 +0,0 @@ -/*
 - * This source file is part of the bstring string library.  This code was
 - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause 
 - * BSD open source license or GPL v2.0. Refer to the accompanying documentation 
 - * for details on usage and license.
 - */
 -
 -/*
 - * bstrlib.h
 - *
 - * This file is the header file for the core module for implementing the 
 - * bstring functions.
 - */
 -
 -#ifndef BSTRLIB_INCLUDE
 -#define BSTRLIB_INCLUDE
 -
 -#ifdef __cplusplus
 -extern "C" {
 -#endif
 -
 -#include <stdarg.h>
 -#include <string.h>
 -#include <limits.h>
 -#include <ctype.h>
 -
 -#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
 -# if defined (__TURBOC__) && !defined (__BORLANDC__)
 -#  define BSTRLIB_NOVSNP
 -# endif
 -#endif
 -
 -#define BSTR_ERR (-1)
 -#define BSTR_OK (0)
 -#define BSTR_BS_BUFF_LENGTH_GET (0)
 -
 -typedef struct tagbstring * bstring;
 -typedef const struct tagbstring * const_bstring;
 -
 -/* Copy functions */
 -#define cstr2bstr bfromcstr
 -extern bstring bfromcstr (const char * str);
 -extern bstring bfromcstralloc (int mlen, const char * str);
 -extern bstring blk2bstr (const void * blk, int len);
 -extern char * bstr2cstr (const_bstring s, char z);
 -extern int bcstrfree (char * s);
 -extern bstring bstrcpy (const_bstring b1);
 -extern int bassign (bstring a, const_bstring b);
 -extern int bassignmidstr (bstring a, const_bstring b, int left, int len);
 -extern int bassigncstr (bstring a, const char * str);
 -extern int bassignblk (bstring a, const void * s, int len);
 -
 -/* Destroy function */
 -extern int bdestroy (bstring b);
 -
 -/* Space allocation hinting functions */
 -extern int balloc (bstring s, int len);
 -extern int ballocmin (bstring b, int len);
 -
 -/* Substring extraction */
 -extern bstring bmidstr (const_bstring b, int left, int len);
 -
 -/* Various standard manipulations */
 -extern int bconcat (bstring b0, const_bstring b1);
 -extern int bconchar (bstring b0, char c);
 -extern int bcatcstr (bstring b, const char * s);
 -extern int bcatblk (bstring b, const void * s, int len);
 -extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill);
 -extern int binsertch (bstring s1, int pos, int len, unsigned char fill);
 -extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill);
 -extern int bdelete (bstring s1, int pos, int len);
 -extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill);
 -extern int btrunc (bstring b, int n);
 -
 -/* Scan/search functions */
 -extern int bstricmp (const_bstring b0, const_bstring b1);
 -extern int bstrnicmp (const_bstring b0, const_bstring b1, int n);
 -extern int biseqcaseless (const_bstring b0, const_bstring b1);
 -extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len);
 -extern int biseq (const_bstring b0, const_bstring b1);
 -extern int bisstemeqblk (const_bstring b0, const void * blk, int len);
 -extern int biseqcstr (const_bstring b, const char * s);
 -extern int biseqcstrcaseless (const_bstring b, const char * s);
 -extern int bstrcmp (const_bstring b0, const_bstring b1);
 -extern int bstrncmp (const_bstring b0, const_bstring b1, int n);
 -extern int binstr (const_bstring s1, int pos, const_bstring s2);
 -extern int binstrr (const_bstring s1, int pos, const_bstring s2);
 -extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2);
 -extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2);
 -extern int bstrchrp (const_bstring b, int c, int pos);
 -extern int bstrrchrp (const_bstring b, int c, int pos);
 -#define bstrchr(b,c) bstrchrp ((b), (c), 0)
 -#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1)
 -extern int binchr (const_bstring b0, int pos, const_bstring b1);
 -extern int binchrr (const_bstring b0, int pos, const_bstring b1);
 -extern int bninchr (const_bstring b0, int pos, const_bstring b1);
 -extern int bninchrr (const_bstring b0, int pos, const_bstring b1);
 -extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos);
 -extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos);
 -
 -/* List of string container functions */
 -struct bstrList {
 -    int qty, mlen;
 -    bstring * entry;
 -};
 -extern struct bstrList * bstrListCreate (void);
 -extern int bstrListDestroy (struct bstrList * sl);
 -extern int bstrListAlloc (struct bstrList * sl, int msz);
 -extern int bstrListAllocMin (struct bstrList * sl, int msz);
 -
 -/* String split and join functions */
 -extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar);
 -extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr);
 -extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr);
 -extern bstring bjoin (const struct bstrList * bl, const_bstring sep);
 -extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
 -	int (* cb) (void * parm, int ofs, int len), void * parm);
 -extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
 -	int (* cb) (void * parm, int ofs, int len), void * parm);
 -extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
 -	int (* cb) (void * parm, int ofs, int len), void * parm);
 -
 -/* Miscellaneous functions */
 -extern int bpattern (bstring b, int len);
 -extern int btoupper (bstring b);
 -extern int btolower (bstring b);
 -extern int bltrimws (bstring b);
 -extern int brtrimws (bstring b);
 -extern int btrimws (bstring b);
 -
 -/* <*>printf format functions */
 -#if !defined (BSTRLIB_NOVSNP)
 -extern bstring bformat (const char * fmt, ...);
 -extern int bformata (bstring b, const char * fmt, ...);
 -extern int bassignformat (bstring b, const char * fmt, ...);
 -extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist);
 -
 -#define bvformata(ret, b, fmt, lastarg) { \
 -bstring bstrtmp_b = (b); \
 -const char * bstrtmp_fmt = (fmt); \
 -int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \
 -	for (;;) { \
 -		va_list bstrtmp_arglist; \
 -		va_start (bstrtmp_arglist, lastarg); \
 -		bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \
 -		va_end (bstrtmp_arglist); \
 -		if (bstrtmp_r >= 0) { /* Everything went ok */ \
 -			bstrtmp_r = BSTR_OK; \
 -			break; \
 -		} else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \
 -			bstrtmp_r = BSTR_ERR; \
 -			break; \
 -		} \
 -		bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \
 -	} \
 -	ret = bstrtmp_r; \
 -}
 -
 -#endif
 -
 -typedef int (*bNgetc) (void *parm);
 -typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm);
 -
 -/* Input functions */
 -extern bstring bgets (bNgetc getcPtr, void * parm, char terminator);
 -extern bstring bread (bNread readPtr, void * parm);
 -extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator);
 -extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator);
 -extern int breada (bstring b, bNread readPtr, void * parm);
 -
 -/* Stream functions */
 -extern struct bStream * bsopen (bNread readPtr, void * parm);
 -extern void * bsclose (struct bStream * s);
 -extern int bsbufflength (struct bStream * s, int sz);
 -extern int bsreadln (bstring b, struct bStream * s, char terminator);
 -extern int bsreadlns (bstring r, struct bStream * s, const_bstring term);
 -extern int bsread (bstring b, struct bStream * s, int n);
 -extern int bsreadlna (bstring b, struct bStream * s, char terminator);
 -extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term);
 -extern int bsreada (bstring b, struct bStream * s, int n);
 -extern int bsunread (struct bStream * s, const_bstring b);
 -extern int bspeek (bstring r, const struct bStream * s);
 -extern int bssplitscb (struct bStream * s, const_bstring splitStr, 
 -	int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
 -extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, 
 -	int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
 -extern int bseof (const struct bStream * s);
 -
 -struct tagbstring {
 -	int mlen;
 -	int slen;
 -	unsigned char * data;
 -};
 -
 -/* Accessor macros */
 -#define blengthe(b, e)      (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen))
 -#define blength(b)          (blengthe ((b), 0))
 -#define bdataofse(b, o, e)  (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o))
 -#define bdataofs(b, o)      (bdataofse ((b), (o), (void *)0))
 -#define bdatae(b, e)        (bdataofse (b, 0, e))
 -#define bdata(b)            (bdataofs (b, 0))
 -#define bchare(b, p, e)     ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e))
 -#define bchar(b, p)         bchare ((b), (p), '\0')
 -
 -/* Static constant string initialization macro */
 -#define bsStaticMlen(q,m)   {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")}
 -#if defined(_MSC_VER)
 -/* There are many versions of MSVC which emit __LINE__ as a non-constant. */
 -# define bsStatic(q)        bsStaticMlen(q,-32)
 -#endif
 -#ifndef bsStatic
 -# define bsStatic(q)        bsStaticMlen(q,-__LINE__)
 -#endif
 -
 -/* Static constant block parameter pair */
 -#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1)
 -
 -/* Reference building macros */
 -#define cstr2tbstr btfromcstr
 -#define btfromcstr(t,s) {                                            \
 -    (t).data = (unsigned char *) (s);                                \
 -    (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \
 -    (t).mlen = -1;                                                   \
 -}
 -#define blk2tbstr(t,s,l) {            \
 -    (t).data = (unsigned char *) (s); \
 -    (t).slen = l;                     \
 -    (t).mlen = -1;                    \
 -}
 -#define btfromblk(t,s,l) blk2tbstr(t,s,l)
 -#define bmid2tbstr(t,b,p,l) {                                                \
 -    const_bstring bstrtmp_s = (b);                                           \
 -    if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) {              \
 -        int bstrtmp_left = (p);                                              \
 -        int bstrtmp_len  = (l);                                              \
 -        if (bstrtmp_left < 0) {                                              \
 -            bstrtmp_len += bstrtmp_left;                                     \
 -            bstrtmp_left = 0;                                                \
 -        }                                                                    \
 -        if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left)                    \
 -            bstrtmp_len = bstrtmp_s->slen - bstrtmp_left;                    \
 -        if (bstrtmp_len <= 0) {                                              \
 -            (t).data = (unsigned char *)"";                                  \
 -            (t).slen = 0;                                                    \
 -        } else {                                                             \
 -            (t).data = bstrtmp_s->data + bstrtmp_left;                       \
 -            (t).slen = bstrtmp_len;                                          \
 -        }                                                                    \
 -    } else {                                                                 \
 -        (t).data = (unsigned char *)"";                                      \
 -        (t).slen = 0;                                                        \
 -    }                                                                        \
 -    (t).mlen = -__LINE__;                                                    \
 -}
 -#define btfromblkltrimws(t,s,l) {                                            \
 -    int bstrtmp_idx = 0, bstrtmp_len = (l);                                  \
 -    unsigned char * bstrtmp_s = (s);                                         \
 -    if (bstrtmp_s && bstrtmp_len >= 0) {                                     \
 -        for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) {                   \
 -            if (!isspace (bstrtmp_s[bstrtmp_idx])) break;                    \
 -        }                                                                    \
 -    }                                                                        \
 -    (t).data = bstrtmp_s + bstrtmp_idx;                                      \
 -    (t).slen = bstrtmp_len - bstrtmp_idx;                                    \
 -    (t).mlen = -__LINE__;                                                    \
 -}
 -#define btfromblkrtrimws(t,s,l) {                                            \
 -    int bstrtmp_len = (l) - 1;                                               \
 -    unsigned char * bstrtmp_s = (s);                                         \
 -    if (bstrtmp_s && bstrtmp_len >= 0) {                                     \
 -        for (; bstrtmp_len >= 0; bstrtmp_len--) {                            \
 -            if (!isspace (bstrtmp_s[bstrtmp_len])) break;                    \
 -        }                                                                    \
 -    }                                                                        \
 -    (t).data = bstrtmp_s;                                                    \
 -    (t).slen = bstrtmp_len + 1;                                              \
 -    (t).mlen = -__LINE__;                                                    \
 -}
 -#define btfromblktrimws(t,s,l) {                                             \
 -    int bstrtmp_idx = 0, bstrtmp_len = (l) - 1;                              \
 -    unsigned char * bstrtmp_s = (s);                                         \
 -    if (bstrtmp_s && bstrtmp_len >= 0) {                                     \
 -        for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) {                  \
 -            if (!isspace (bstrtmp_s[bstrtmp_idx])) break;                    \
 -        }                                                                    \
 -        for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) {                  \
 -            if (!isspace (bstrtmp_s[bstrtmp_len])) break;                    \
 -        }                                                                    \
 -    }                                                                        \
 -    (t).data = bstrtmp_s + bstrtmp_idx;                                      \
 -    (t).slen = bstrtmp_len + 1 - bstrtmp_idx;                                \
 -    (t).mlen = -__LINE__;                                                    \
 -}
 -
 -/* Write protection macros */
 -#define bwriteprotect(t)     { if ((t).mlen >=  0) (t).mlen = -1; }
 -#define bwriteallow(t)       { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); }
 -#define biswriteprotected(t) ((t).mlen <= 0)
 -
 -#ifdef __cplusplus
 -}
 -#endif
 -
 -#endif
 diff --git a/src/buffer.c b/src/buffer.c new file mode 100644 index 0000000..b81e7fa --- /dev/null +++ b/src/buffer.c @@ -0,0 +1,313 @@ +#include <stdarg.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/param.h> + +#include "buffer.h" + +/* Used as default value for gh_buf->ptr so that people can always + * assume ptr is non-NULL and zero terminated even for new gh_bufs. + */ +unsigned char gh_buf__initbuf[1]; +unsigned char gh_buf__oom[1]; + +#define ENSURE_SIZE(b, d) \ +	if ((d) > buf->asize && gh_buf_grow(b, (d)) < 0)\ +		return -1; + +void gh_buf_init(gh_buf *buf, int initial_size) +{ +	buf->asize = 0; +	buf->size = 0; +	buf->ptr = gh_buf__initbuf; + +	if (initial_size) +		gh_buf_grow(buf, initial_size); +} + +int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom) +{ +	char *new_ptr; +	size_t new_size; + +	if (buf->ptr == gh_buf__oom || buf->asize < 0) +		return -1; + +	if (target_size <= buf->asize) +		return 0; + +	if (buf->asize == 0) { +		new_size = target_size; +		new_ptr = NULL; +	} else { +		new_size = buf->asize; +		new_ptr = buf->ptr; +	} + +	/* grow the buffer size by 1.5, until it's big enough +	 * to fit our target size */ +	while (new_size < target_size) +		new_size = (new_size << 1) - (new_size >> 1); + +	/* round allocation up to multiple of 8 */ +	new_size = (new_size + 7) & ~7; + +	new_ptr = realloc(new_ptr, new_size); + +	if (!new_ptr) { +		if (mark_oom) +			buf->ptr = gh_buf__oom; +		return -1; +	} + +	buf->asize = new_size; +	buf->ptr   = new_ptr; + +	/* truncate the existing buffer size if necessary */ +	if (buf->size >= buf->asize) +		buf->size = buf->asize - 1; +	buf->ptr[buf->size] = '\0'; + +	return 0; +} + +void gh_buf_free(gh_buf *buf) +{ +	if (!buf) return; + +	if (buf->asize > 0 && buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom) +		free(buf->ptr); + +	gh_buf_init(buf, 0); +} + +void gh_buf_clear(gh_buf *buf) +{ +	buf->size = 0; + +	if (buf->asize > 0) +		buf->ptr[0] = '\0'; + +	if (buf->asize < 0) { +		buf->ptr = gh_buf__initbuf; +		buf->asize = 0; +	} +} + +int gh_buf_set(gh_buf *buf, const char *data, int len) +{ +	if (len == 0 || data == NULL) { +		gh_buf_clear(buf); +	} else { +		if (data != buf->ptr) { +			ENSURE_SIZE(buf, len + 1); +			memmove(buf->ptr, data, len); +		} +		buf->size = len; +		buf->ptr[buf->size] = '\0'; +	} +	return 0; +} + +int gh_buf_sets(gh_buf *buf, const char *string) +{ +	return gh_buf_set(buf, string, string ? strlen(string) : 0); +} + +int gh_buf_putc(gh_buf *buf, char c) +{ +	ENSURE_SIZE(buf, buf->size + 2); +	buf->ptr[buf->size++] = c; +	buf->ptr[buf->size] = '\0'; +	return 0; +} + +int gh_buf_put(gh_buf *buf, const char *data, int len) +{ +	ENSURE_SIZE(buf, buf->size + len + 1); +	memmove(buf->ptr + buf->size, data, len); +	buf->size += len; +	buf->ptr[buf->size] = '\0'; +	return 0; +} + +int gh_buf_puts(gh_buf *buf, const char *string) +{ +	assert(string); +	return gh_buf_put(buf, string, strlen(string)); +} + +int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap) +{ +	const int expected_size = buf->size + (strlen(format) * 2); +	int len; + +	ENSURE_SIZE(buf, expected_size); + +	while (1) { +		va_list args; +		va_copy(args, ap); + +		len = vsnprintf( +			buf->ptr + buf->size, +			buf->asize - buf->size, +			format, args +		); + +		if (len < 0) { +			free(buf->ptr); +			buf->ptr = gh_buf__oom; +			return -1; +		} + +		if (len + 1 <= buf->asize - buf->size) { +			buf->size += len; +			break; +		} + +		ENSURE_SIZE(buf, buf->size + len + 1); +	} + +	return 0; +} + +int gh_buf_printf(gh_buf *buf, const char *format, ...) +{ +	int r; +	va_list ap; + +	va_start(ap, format); +	r = gh_buf_vprintf(buf, format, ap); +	va_end(ap); + +	return r; +} + +void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf) +{ +	size_t copylen; + +	assert(data && datasize && buf); + +	data[0] = '\0'; + +	if (buf->size == 0 || buf->asize <= 0) +		return; + +	copylen = buf->size; +	if (copylen > datasize - 1) +		copylen = datasize - 1; +	memmove(data, buf->ptr, copylen); +	data[copylen] = '\0'; +} + +void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b) +{ +	gh_buf t = *buf_a; +	*buf_a = *buf_b; +	*buf_b = t; +} + +char *gh_buf_detach(gh_buf *buf) +{ +	char *data = buf->ptr; + +	if (buf->asize == 0 || buf->ptr == gh_buf__oom) +		return NULL; + +	gh_buf_init(buf, 0); + +	return data; +} + +void gh_buf_attach(gh_buf *buf, char *ptr, int asize) +{ +	gh_buf_free(buf); + +	if (ptr) { +		buf->ptr = ptr; +		buf->size = strlen(ptr); +		if (asize) +			buf->asize = (asize < buf->size) ? buf->size + 1 : asize; +		else /* pass 0 to fall back on strlen + 1 */ +			buf->asize = buf->size + 1; +	} else { +		gh_buf_grow(buf, asize); +	} +} + +int gh_buf_cmp(const gh_buf *a, const gh_buf *b) +{ +	int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); +	return (result != 0) ? result : +		(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; +} + +int gh_buf_strchr(const gh_buf *buf, int c, int pos) +{ +  const char *p = memchr(buf->ptr + pos, c, buf->size - pos); +  if (!p) +    return -1; + +  return (int)(p - p->ptr); +} + +int gh_buf_strrchr(const gh_buf *buf, int c, int pos) +{ +	int i; + +	for (i = pos; i >= 0; i--) { +		if (buf->ptr[i] == (unsigned char) c) +			return i; +	} + +	return -1; +} + +void gh_buf_truncate(gh_buf *buf, size_t len) +{ +	assert(buf->asize >= 0); + +	if (len < buf->size) { +		buf->size = len; +		buf->ptr[buf->size] = '\0'; +	} +} + +void gh_buf_ltruncate(gh_buf *buf, size_t len) +{ +	assert(buf->asize >= 0); + +	if (len && len < buf->size) { +		memmove(buf->ptr, buf->ptr + len, buf->size - len); +		buf->size -= len; +		buf->ptr[buf->size] = '\0'; +	} +} + +void gh_buf_trim(gh_buf *buf) +{ +	size_t i = 0; + +	assert(buf->asize >= 0); + +	/* ltrim */ +	while (i < buf->size && isspace(buf->ptr[i])) +		i++; + +	gh_buf_truncate(buf, i); + +	/* rtrim */ +	while (buf->size > 0) { +		if (!isspace(buf->ptr[buf->size - 1])) +			break; + +		buf->size--; +	} + +	buf->ptr[buf->size] = '\0'; +} diff --git a/src/buffer.h b/src/buffer.h new file mode 100644 index 0000000..2581ee3 --- /dev/null +++ b/src/buffer.h @@ -0,0 +1,119 @@ +#ifndef INCLUDE_buffer_h__ +#define INCLUDE_buffer_h__ + +#include <stdbool.h> +#include <stddef.h> +#include <stdarg.h> +#include <sys/types.h> + +typedef struct { +	unsigned char *ptr; +	int asize, size; +} gh_buf; + +extern unsigned char gh_buf__initbuf[]; +extern unsigned char gh_buf__oom[]; + +#define GH_BUF_INIT { gh_buf__initbuf, 0, 0 } + +/** + * Initialize a gh_buf structure. + * + * For the cases where GH_BUF_INIT cannot be used to do static + * initialization. + */ +extern void gh_buf_init(gh_buf *buf, int initial_size); + +static inline void gh_buf_static(gh_buf *buf, unsigned char *source) +{ +	buf->ptr = source; +	buf->size = strlen(source); +	buf->asize = -1; +} + +/** + * Attempt to grow the buffer to hold at least `target_size` bytes. + * + * If the allocation fails, this will return an error.  If mark_oom is true, + * this will mark the buffer as invalid for future operations; if false, + * existing buffer content will be preserved, but calling code must handle + * that buffer was not expanded. + */ +extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom); + +/** + * Grow the buffer to hold at least `target_size` bytes. + * + * If the allocation fails, this will return an error and the buffer will be + * marked as invalid for future operations, invaliding contents. + * + * @return 0 on success or -1 on failure + */ +static inline int gh_buf_grow(gh_buf *buf, int target_size) +{ +	return gh_buf_try_grow(buf, target_size, true); +} + +extern void gh_buf_free(gh_buf *buf); +extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b); + +/** + * Test if there have been any reallocation failures with this gh_buf. + * + * Any function that writes to a gh_buf can fail due to memory allocation + * issues.  If one fails, the gh_buf will be marked with an OOM error and + * further calls to modify the buffer will fail.  Check gh_buf_oom() at the + * end of your sequence and it will be true if you ran out of memory at any + * point with that buffer. + * + * @return false if no error, true if allocation error + */ +static inline bool gh_buf_oom(const gh_buf *buf) +{ +	return (buf->ptr == gh_buf__oom); +} + + +static inline size_t gh_buf_len(const gh_buf *buf) +{ +	return buf->size; +} + +extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b); + +extern void gh_buf_attach(gh_buf *buf, char *ptr, int asize); +extern char *gh_buf_detach(gh_buf *buf); +extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf); + +static inline const char *gh_buf_cstr(const gh_buf *buf) +{ +	return buf->ptr; +} + +#define gh_buf_at(buf, n) ((buf)->ptr[n]) + +/* + * Functions below that return int value error codes will return 0 on + * success or -1 on failure (which generally means an allocation failed). + * Using a gh_buf where the allocation has failed with result in -1 from + * all further calls using that buffer.  As a result, you can ignore the + * return code of these functions and call them in a series then just call + * gh_buf_oom at the end. + */ +extern int gh_buf_set(gh_buf *buf, const char *data, int len); +extern int gh_buf_sets(gh_buf *buf, const char *string); +extern int gh_buf_putc(gh_buf *buf, char c); +extern int gh_buf_put(gh_buf *buf, const char *data, int len); +extern int gh_buf_puts(gh_buf *buf, const char *string); +extern int gh_buf_printf(gh_buf *buf, const char *format, ...) +	__attribute__((format (printf, 2, 3))); +extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap); +extern void gh_buf_clear(gh_buf *buf); + +int gh_buf_strchr(const gh_buf *buf, int c, int pos); +int gh_buf_strrchr(const gh_buf *buf, int c, int pos); +void gh_buf_truncate(gh_buf *buf, int len); +void gh_buf_ltruncate(gh_buf *buf, int len); +void gh_buf_trim(gh_buf *buf); + +#endif diff --git a/src/case_fold_switch.c b/src/case_fold_switch.inc index 70fdd75..70fdd75 100644 --- a/src/case_fold_switch.c +++ b/src/case_fold_switch.inc diff --git a/src/casefold.c b/src/casefold.c deleted file mode 100644 index 33f18aa..0000000 --- a/src/casefold.c +++ /dev/null @@ -1,2699 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> - - -  switch c { -    case 0x0041: -      bufpush(0x0061); -      break; -    case 0x0042: -      bufpush(0x0062); -      break; -    case 0x0043: -      bufpush(0x0063); -      break; -    case 0x0044: -      bufpush(0x0064); -      break; -    case 0x0045: -      bufpush(0x0065); -      break; -    case 0x0046: -      bufpush(0x0066); -      break; -    case 0x0047: -      bufpush(0x0067); -      break; -    case 0x0048: -      bufpush(0x0068); -      break; -    case 0x0049: -      bufpush(0x0069); -      break; -    case 0x0049: -      bufpush(0x0131); -      break; -    case 0x004A: -      bufpush(0x006A); -      break; -    case 0x004B: -      bufpush(0x006B); -      break; -    case 0x004C: -      bufpush(0x006C); -      break; -    case 0x004D: -      bufpush(0x006D); -      break; -    case 0x004E: -      bufpush(0x006E); -      break; -    case 0x004F: -      bufpush(0x006F); -      break; -    case 0x0050: -      bufpush(0x0070); -      break; -    case 0x0051: -      bufpush(0x0071); -      break; -    case 0x0052: -      bufpush(0x0072); -      break; -    case 0x0053: -      bufpush(0x0073); -      break; -    case 0x0054: -      bufpush(0x0074); -      break; -    case 0x0055: -      bufpush(0x0075); -      break; -    case 0x0056: -      bufpush(0x0076); -      break; -    case 0x0057: -      bufpush(0x0077); -      break; -    case 0x0058: -      bufpush(0x0078); -      break; -    case 0x0059: -      bufpush(0x0079); -      break; -    case 0x005A: -      bufpush(0x007A); -      break; -    case 0x00B5: -      bufpush(0x03BC); -      break; -    case 0x00C0: -      bufpush(0x00E0); -      break; -    case 0x00C1: -      bufpush(0x00E1); -      break; -    case 0x00C2: -      bufpush(0x00E2); -      break; -    case 0x00C3: -      bufpush(0x00E3); -      break; -    case 0x00C4: -      bufpush(0x00E4); -      break; -    case 0x00C5: -      bufpush(0x00E5); -      break; -    case 0x00C6: -      bufpush(0x00E6); -      break; -    case 0x00C7: -      bufpush(0x00E7); -      break; -    case 0x00C8: -      bufpush(0x00E8); -      break; -    case 0x00C9: -      bufpush(0x00E9); -      break; -    case 0x00CA: -      bufpush(0x00EA); -      break; -    case 0x00CB: -      bufpush(0x00EB); -      break; -    case 0x00CC: -      bufpush(0x00EC); -      break; -    case 0x00CD: -      bufpush(0x00ED); -      break; -    case 0x00CE: -      bufpush(0x00EE); -      break; -    case 0x00CF: -      bufpush(0x00EF); -      break; -    case 0x00D0: -      bufpush(0x00F0); -      break; -    case 0x00D1: -      bufpush(0x00F1); -      break; -    case 0x00D2: -      bufpush(0x00F2); -      break; -    case 0x00D3: -      bufpush(0x00F3); -      break; -    case 0x00D4: -      bufpush(0x00F4); -      break; -    case 0x00D5: -      bufpush(0x00F5); -      break; -    case 0x00D6: -      bufpush(0x00F6); -      break; -    case 0x00D8: -      bufpush(0x00F8); -      break; -    case 0x00D9: -      bufpush(0x00F9); -      break; -    case 0x00DA: -      bufpush(0x00FA); -      break; -    case 0x00DB: -      bufpush(0x00FB); -      break; -    case 0x00DC: -      bufpush(0x00FC); -      break; -    case 0x00DD: -      bufpush(0x00FD); -      break; -    case 0x00DE: -      bufpush(0x00FE); -      break; -    case 0x00DF: -      bufpush(0x0073); -      bufpush(0x0073); -      break; -    case 0x0100: -      bufpush(0x0101); -      break; -    case 0x0102: -      bufpush(0x0103); -      break; -    case 0x0104: -      bufpush(0x0105); -      break; -    case 0x0106: -      bufpush(0x0107); -      break; -    case 0x0108: -      bufpush(0x0109); -      break; -    case 0x010A: -      bufpush(0x010B); -      break; -    case 0x010C: -      bufpush(0x010D); -      break; -    case 0x010E: -      bufpush(0x010F); -      break; -    case 0x0110: -      bufpush(0x0111); -      break; -    case 0x0112: -      bufpush(0x0113); -      break; -    case 0x0114: -      bufpush(0x0115); -      break; -    case 0x0116: -      bufpush(0x0117); -      break; -    case 0x0118: -      bufpush(0x0119); -      break; -    case 0x011A: -      bufpush(0x011B); -      break; -    case 0x011C: -      bufpush(0x011D); -      break; -    case 0x011E: -      bufpush(0x011F); -      break; -    case 0x0120: -      bufpush(0x0121); -      break; -    case 0x0122: -      bufpush(0x0123); -      break; -    case 0x0124: -      bufpush(0x0125); -      break; -    case 0x0126: -      bufpush(0x0127); -      break; -    case 0x0128: -      bufpush(0x0129); -      break; -    case 0x012A: -      bufpush(0x012B); -      break; -    case 0x012C: -      bufpush(0x012D); -      break; -    case 0x012E: -      bufpush(0x012F); -      break; -    case 0x0130: -      bufpush(0x0069); -      bufpush(0x0307); -      break; -    case 0x0130: -      bufpush(0x0069); -      break; -    case 0x0132: -      bufpush(0x0133); -      break; -    case 0x0134: -      bufpush(0x0135); -      break; -    case 0x0136: -      bufpush(0x0137); -      break; -    case 0x0139: -      bufpush(0x013A); -      break; -    case 0x013B: -      bufpush(0x013C); -      break; -    case 0x013D: -      bufpush(0x013E); -      break; -    case 0x013F: -      bufpush(0x0140); -      break; -    case 0x0141: -      bufpush(0x0142); -      break; -    case 0x0143: -      bufpush(0x0144); -      break; -    case 0x0145: -      bufpush(0x0146); -      break; -    case 0x0147: -      bufpush(0x0148); -      break; -    case 0x0149: -      bufpush(0x02BC); -      bufpush(0x006E); -      break; -    case 0x014A: -      bufpush(0x014B); -      break; -    case 0x014C: -      bufpush(0x014D); -      break; -    case 0x014E: -      bufpush(0x014F); -      break; -    case 0x0150: -      bufpush(0x0151); -      break; -    case 0x0152: -      bufpush(0x0153); -      break; -    case 0x0154: -      bufpush(0x0155); -      break; -    case 0x0156: -      bufpush(0x0157); -      break; -    case 0x0158: -      bufpush(0x0159); -      break; -    case 0x015A: -      bufpush(0x015B); -      break; -    case 0x015C: -      bufpush(0x015D); -      break; -    case 0x015E: -      bufpush(0x015F); -      break; -    case 0x0160: -      bufpush(0x0161); -      break; -    case 0x0162: -      bufpush(0x0163); -      break; -    case 0x0164: -      bufpush(0x0165); -      break; -    case 0x0166: -      bufpush(0x0167); -      break; -    case 0x0168: -      bufpush(0x0169); -      break; -    case 0x016A: -      bufpush(0x016B); -      break; -    case 0x016C: -      bufpush(0x016D); -      break; -    case 0x016E: -      bufpush(0x016F); -      break; -    case 0x0170: -      bufpush(0x0171); -      break; -    case 0x0172: -      bufpush(0x0173); -      break; -    case 0x0174: -      bufpush(0x0175); -      break; -    case 0x0176: -      bufpush(0x0177); -      break; -    case 0x0178: -      bufpush(0x00FF); -      break; -    case 0x0179: -      bufpush(0x017A); -      break; -    case 0x017B: -      bufpush(0x017C); -      break; -    case 0x017D: -      bufpush(0x017E); -      break; -    case 0x017F: -      bufpush(0x0073); -      break; -    case 0x0181: -      bufpush(0x0253); -      break; -    case 0x0182: -      bufpush(0x0183); -      break; -    case 0x0184: -      bufpush(0x0185); -      break; -    case 0x0186: -      bufpush(0x0254); -      break; -    case 0x0187: -      bufpush(0x0188); -      break; -    case 0x0189: -      bufpush(0x0256); -      break; -    case 0x018A: -      bufpush(0x0257); -      break; -    case 0x018B: -      bufpush(0x018C); -      break; -    case 0x018E: -      bufpush(0x01DD); -      break; -    case 0x018F: -      bufpush(0x0259); -      break; -    case 0x0190: -      bufpush(0x025B); -      break; -    case 0x0191: -      bufpush(0x0192); -      break; -    case 0x0193: -      bufpush(0x0260); -      break; -    case 0x0194: -      bufpush(0x0263); -      break; -    case 0x0196: -      bufpush(0x0269); -      break; -    case 0x0197: -      bufpush(0x0268); -      break; -    case 0x0198: -      bufpush(0x0199); -      break; -    case 0x019C: -      bufpush(0x026F); -      break; -    case 0x019D: -      bufpush(0x0272); -      break; -    case 0x019F: -      bufpush(0x0275); -      break; -    case 0x01A0: -      bufpush(0x01A1); -      break; -    case 0x01A2: -      bufpush(0x01A3); -      break; -    case 0x01A4: -      bufpush(0x01A5); -      break; -    case 0x01A6: -      bufpush(0x0280); -      break; -    case 0x01A7: -      bufpush(0x01A8); -      break; -    case 0x01A9: -      bufpush(0x0283); -      break; -    case 0x01AC: -      bufpush(0x01AD); -      break; -    case 0x01AE: -      bufpush(0x0288); -      break; -    case 0x01AF: -      bufpush(0x01B0); -      break; -    case 0x01B1: -      bufpush(0x028A); -      break; -    case 0x01B2: -      bufpush(0x028B); -      break; -    case 0x01B3: -      bufpush(0x01B4); -      break; -    case 0x01B5: -      bufpush(0x01B6); -      break; -    case 0x01B7: -      bufpush(0x0292); -      break; -    case 0x01B8: -      bufpush(0x01B9); -      break; -    case 0x01BC: -      bufpush(0x01BD); -      break; -    case 0x01C4: -      bufpush(0x01C6); -      break; -    case 0x01C5: -      bufpush(0x01C6); -      break; -    case 0x01C7: -      bufpush(0x01C9); -      break; -    case 0x01C8: -      bufpush(0x01C9); -      break; -    case 0x01CA: -      bufpush(0x01CC); -      break; -    case 0x01CB: -      bufpush(0x01CC); -      break; -    case 0x01CD: -      bufpush(0x01CE); -      break; -    case 0x01CF: -      bufpush(0x01D0); -      break; -    case 0x01D1: -      bufpush(0x01D2); -      break; -    case 0x01D3: -      bufpush(0x01D4); -      break; -    case 0x01D5: -      bufpush(0x01D6); -      break; -    case 0x01D7: -      bufpush(0x01D8); -      break; -    case 0x01D9: -      bufpush(0x01DA); -      break; -    case 0x01DB: -      bufpush(0x01DC); -      break; -    case 0x01DE: -      bufpush(0x01DF); -      break; -    case 0x01E0: -      bufpush(0x01E1); -      break; -    case 0x01E2: -      bufpush(0x01E3); -      break; -    case 0x01E4: -      bufpush(0x01E5); -      break; -    case 0x01E6: -      bufpush(0x01E7); -      break; -    case 0x01E8: -      bufpush(0x01E9); -      break; -    case 0x01EA: -      bufpush(0x01EB); -      break; -    case 0x01EC: -      bufpush(0x01ED); -      break; -    case 0x01EE: -      bufpush(0x01EF); -      break; -    case 0x01F0: -      bufpush(0x006A); -      bufpush(0x030C); -      break; -    case 0x01F1: -      bufpush(0x01F3); -      break; -    case 0x01F2: -      bufpush(0x01F3); -      break; -    case 0x01F4: -      bufpush(0x01F5); -      break; -    case 0x01F6: -      bufpush(0x0195); -      break; -    case 0x01F7: -      bufpush(0x01BF); -      break; -    case 0x01F8: -      bufpush(0x01F9); -      break; -    case 0x01FA: -      bufpush(0x01FB); -      break; -    case 0x01FC: -      bufpush(0x01FD); -      break; -    case 0x01FE: -      bufpush(0x01FF); -      break; -    case 0x0200: -      bufpush(0x0201); -      break; -    case 0x0202: -      bufpush(0x0203); -      break; -    case 0x0204: -      bufpush(0x0205); -      break; -    case 0x0206: -      bufpush(0x0207); -      break; -    case 0x0208: -      bufpush(0x0209); -      break; -    case 0x020A: -      bufpush(0x020B); -      break; -    case 0x020C: -      bufpush(0x020D); -      break; -    case 0x020E: -      bufpush(0x020F); -      break; -    case 0x0210: -      bufpush(0x0211); -      break; -    case 0x0212: -      bufpush(0x0213); -      break; -    case 0x0214: -      bufpush(0x0215); -      break; -    case 0x0216: -      bufpush(0x0217); -      break; -    case 0x0218: -      bufpush(0x0219); -      break; -    case 0x021A: -      bufpush(0x021B); -      break; -    case 0x021C: -      bufpush(0x021D); -      break; -    case 0x021E: -      bufpush(0x021F); -      break; -    case 0x0220: -      bufpush(0x019E); -      break; -    case 0x0222: -      bufpush(0x0223); -      break; -    case 0x0224: -      bufpush(0x0225); -      break; -    case 0x0226: -      bufpush(0x0227); -      break; -    case 0x0228: -      bufpush(0x0229); -      break; -    case 0x022A: -      bufpush(0x022B); -      break; -    case 0x022C: -      bufpush(0x022D); -      break; -    case 0x022E: -      bufpush(0x022F); -      break; -    case 0x0230: -      bufpush(0x0231); -      break; -    case 0x0232: -      bufpush(0x0233); -      break; -    case 0x0345: -      bufpush(0x03B9); -      break; -    case 0x0386: -      bufpush(0x03AC); -      break; -    case 0x0388: -      bufpush(0x03AD); -      break; -    case 0x0389: -      bufpush(0x03AE); -      break; -    case 0x038A: -      bufpush(0x03AF); -      break; -    case 0x038C: -      bufpush(0x03CC); -      break; -    case 0x038E: -      bufpush(0x03CD); -      break; -    case 0x038F: -      bufpush(0x03CE); -      break; -    case 0x0390: -      bufpush(0x03B9); -      bufpush(0x0308); -      bufpush(0x0301); -      break; -    case 0x0391: -      bufpush(0x03B1); -      break; -    case 0x0392: -      bufpush(0x03B2); -      break; -    case 0x0393: -      bufpush(0x03B3); -      break; -    case 0x0394: -      bufpush(0x03B4); -      break; -    case 0x0395: -      bufpush(0x03B5); -      break; -    case 0x0396: -      bufpush(0x03B6); -      break; -    case 0x0397: -      bufpush(0x03B7); -      break; -    case 0x0398: -      bufpush(0x03B8); -      break; -    case 0x0399: -      bufpush(0x03B9); -      break; -    case 0x039A: -      bufpush(0x03BA); -      break; -    case 0x039B: -      bufpush(0x03BB); -      break; -    case 0x039C: -      bufpush(0x03BC); -      break; -    case 0x039D: -      bufpush(0x03BD); -      break; -    case 0x039E: -      bufpush(0x03BE); -      break; -    case 0x039F: -      bufpush(0x03BF); -      break; -    case 0x03A0: -      bufpush(0x03C0); -      break; -    case 0x03A1: -      bufpush(0x03C1); -      break; -    case 0x03A3: -      bufpush(0x03C3); -      break; -    case 0x03A4: -      bufpush(0x03C4); -      break; -    case 0x03A5: -      bufpush(0x03C5); -      break; -    case 0x03A6: -      bufpush(0x03C6); -      break; -    case 0x03A7: -      bufpush(0x03C7); -      break; -    case 0x03A8: -      bufpush(0x03C8); -      break; -    case 0x03A9: -      bufpush(0x03C9); -      break; -    case 0x03AA: -      bufpush(0x03CA); -      break; -    case 0x03AB: -      bufpush(0x03CB); -      break; -    case 0x03B0: -      bufpush(0x03C5); -      bufpush(0x0308); -      bufpush(0x0301); -      break; -    case 0x03C2: -      bufpush(0x03C3); -      break; -    case 0x03D0: -      bufpush(0x03B2); -      break; -    case 0x03D1: -      bufpush(0x03B8); -      break; -    case 0x03D5: -      bufpush(0x03C6); -      break; -    case 0x03D6: -      bufpush(0x03C0); -      break; -    case 0x03D8: -      bufpush(0x03D9); -      break; -    case 0x03DA: -      bufpush(0x03DB); -      break; -    case 0x03DC: -      bufpush(0x03DD); -      break; -    case 0x03DE: -      bufpush(0x03DF); -      break; -    case 0x03E0: -      bufpush(0x03E1); -      break; -    case 0x03E2: -      bufpush(0x03E3); -      break; -    case 0x03E4: -      bufpush(0x03E5); -      break; -    case 0x03E6: -      bufpush(0x03E7); -      break; -    case 0x03E8: -      bufpush(0x03E9); -      break; -    case 0x03EA: -      bufpush(0x03EB); -      break; -    case 0x03EC: -      bufpush(0x03ED); -      break; -    case 0x03EE: -      bufpush(0x03EF); -      break; -    case 0x03F0: -      bufpush(0x03BA); -      break; -    case 0x03F1: -      bufpush(0x03C1); -      break; -    case 0x03F2: -      bufpush(0x03C3); -      break; -    case 0x03F4: -      bufpush(0x03B8); -      break; -    case 0x03F5: -      bufpush(0x03B5); -      break; -    case 0x0400: -      bufpush(0x0450); -      break; -    case 0x0401: -      bufpush(0x0451); -      break; -    case 0x0402: -      bufpush(0x0452); -      break; -    case 0x0403: -      bufpush(0x0453); -      break; -    case 0x0404: -      bufpush(0x0454); -      break; -    case 0x0405: -      bufpush(0x0455); -      break; -    case 0x0406: -      bufpush(0x0456); -      break; -    case 0x0407: -      bufpush(0x0457); -      break; -    case 0x0408: -      bufpush(0x0458); -      break; -    case 0x0409: -      bufpush(0x0459); -      break; -    case 0x040A: -      bufpush(0x045A); -      break; -    case 0x040B: -      bufpush(0x045B); -      break; -    case 0x040C: -      bufpush(0x045C); -      break; -    case 0x040D: -      bufpush(0x045D); -      break; -    case 0x040E: -      bufpush(0x045E); -      break; -    case 0x040F: -      bufpush(0x045F); -      break; -    case 0x0410: -      bufpush(0x0430); -      break; -    case 0x0411: -      bufpush(0x0431); -      break; -    case 0x0412: -      bufpush(0x0432); -      break; -    case 0x0413: -      bufpush(0x0433); -      break; -    case 0x0414: -      bufpush(0x0434); -      break; -    case 0x0415: -      bufpush(0x0435); -      break; -    case 0x0416: -      bufpush(0x0436); -      break; -    case 0x0417: -      bufpush(0x0437); -      break; -    case 0x0418: -      bufpush(0x0438); -      break; -    case 0x0419: -      bufpush(0x0439); -      break; -    case 0x041A: -      bufpush(0x043A); -      break; -    case 0x041B: -      bufpush(0x043B); -      break; -    case 0x041C: -      bufpush(0x043C); -      break; -    case 0x041D: -      bufpush(0x043D); -      break; -    case 0x041E: -      bufpush(0x043E); -      break; -    case 0x041F: -      bufpush(0x043F); -      break; -    case 0x0420: -      bufpush(0x0440); -      break; -    case 0x0421: -      bufpush(0x0441); -      break; -    case 0x0422: -      bufpush(0x0442); -      break; -    case 0x0423: -      bufpush(0x0443); -      break; -    case 0x0424: -      bufpush(0x0444); -      break; -    case 0x0425: -      bufpush(0x0445); -      break; -    case 0x0426: -      bufpush(0x0446); -      break; -    case 0x0427: -      bufpush(0x0447); -      break; -    case 0x0428: -      bufpush(0x0448); -      break; -    case 0x0429: -      bufpush(0x0449); -      break; -    case 0x042A: -      bufpush(0x044A); -      break; -    case 0x042B: -      bufpush(0x044B); -      break; -    case 0x042C: -      bufpush(0x044C); -      break; -    case 0x042D: -      bufpush(0x044D); -      break; -    case 0x042E: -      bufpush(0x044E); -      break; -    case 0x042F: -      bufpush(0x044F); -      break; -    case 0x0460: -      bufpush(0x0461); -      break; -    case 0x0462: -      bufpush(0x0463); -      break; -    case 0x0464: -      bufpush(0x0465); -      break; -    case 0x0466: -      bufpush(0x0467); -      break; -    case 0x0468: -      bufpush(0x0469); -      break; -    case 0x046A: -      bufpush(0x046B); -      break; -    case 0x046C: -      bufpush(0x046D); -      break; -    case 0x046E: -      bufpush(0x046F); -      break; -    case 0x0470: -      bufpush(0x0471); -      break; -    case 0x0472: -      bufpush(0x0473); -      break; -    case 0x0474: -      bufpush(0x0475); -      break; -    case 0x0476: -      bufpush(0x0477); -      break; -    case 0x0478: -      bufpush(0x0479); -      break; -    case 0x047A: -      bufpush(0x047B); -      break; -    case 0x047C: -      bufpush(0x047D); -      break; -    case 0x047E: -      bufpush(0x047F); -      break; -    case 0x0480: -      bufpush(0x0481); -      break; -    case 0x048A: -      bufpush(0x048B); -      break; -    case 0x048C: -      bufpush(0x048D); -      break; -    case 0x048E: -      bufpush(0x048F); -      break; -    case 0x0490: -      bufpush(0x0491); -      break; -    case 0x0492: -      bufpush(0x0493); -      break; -    case 0x0494: -      bufpush(0x0495); -      break; -    case 0x0496: -      bufpush(0x0497); -      break; -    case 0x0498: -      bufpush(0x0499); -      break; -    case 0x049A: -      bufpush(0x049B); -      break; -    case 0x049C: -      bufpush(0x049D); -      break; -    case 0x049E: -      bufpush(0x049F); -      break; -    case 0x04A0: -      bufpush(0x04A1); -      break; -    case 0x04A2: -      bufpush(0x04A3); -      break; -    case 0x04A4: -      bufpush(0x04A5); -      break; -    case 0x04A6: -      bufpush(0x04A7); -      break; -    case 0x04A8: -      bufpush(0x04A9); -      break; -    case 0x04AA: -      bufpush(0x04AB); -      break; -    case 0x04AC: -      bufpush(0x04AD); -      break; -    case 0x04AE: -      bufpush(0x04AF); -      break; -    case 0x04B0: -      bufpush(0x04B1); -      break; -    case 0x04B2: -      bufpush(0x04B3); -      break; -    case 0x04B4: -      bufpush(0x04B5); -      break; -    case 0x04B6: -      bufpush(0x04B7); -      break; -    case 0x04B8: -      bufpush(0x04B9); -      break; -    case 0x04BA: -      bufpush(0x04BB); -      break; -    case 0x04BC: -      bufpush(0x04BD); -      break; -    case 0x04BE: -      bufpush(0x04BF); -      break; -    case 0x04C1: -      bufpush(0x04C2); -      break; -    case 0x04C3: -      bufpush(0x04C4); -      break; -    case 0x04C5: -      bufpush(0x04C6); -      break; -    case 0x04C7: -      bufpush(0x04C8); -      break; -    case 0x04C9: -      bufpush(0x04CA); -      break; -    case 0x04CB: -      bufpush(0x04CC); -      break; -    case 0x04CD: -      bufpush(0x04CE); -      break; -    case 0x04D0: -      bufpush(0x04D1); -      break; -    case 0x04D2: -      bufpush(0x04D3); -      break; -    case 0x04D4: -      bufpush(0x04D5); -      break; -    case 0x04D6: -      bufpush(0x04D7); -      break; -    case 0x04D8: -      bufpush(0x04D9); -      break; -    case 0x04DA: -      bufpush(0x04DB); -      break; -    case 0x04DC: -      bufpush(0x04DD); -      break; -    case 0x04DE: -      bufpush(0x04DF); -      break; -    case 0x04E0: -      bufpush(0x04E1); -      break; -    case 0x04E2: -      bufpush(0x04E3); -      break; -    case 0x04E4: -      bufpush(0x04E5); -      break; -    case 0x04E6: -      bufpush(0x04E7); -      break; -    case 0x04E8: -      bufpush(0x04E9); -      break; -    case 0x04EA: -      bufpush(0x04EB); -      break; -    case 0x04EC: -      bufpush(0x04ED); -      break; -    case 0x04EE: -      bufpush(0x04EF); -      break; -    case 0x04F0: -      bufpush(0x04F1); -      break; -    case 0x04F2: -      bufpush(0x04F3); -      break; -    case 0x04F4: -      bufpush(0x04F5); -      break; -    case 0x04F8: -      bufpush(0x04F9); -      break; -    case 0x0500: -      bufpush(0x0501); -      break; -    case 0x0502: -      bufpush(0x0503); -      break; -    case 0x0504: -      bufpush(0x0505); -      break; -    case 0x0506: -      bufpush(0x0507); -      break; -    case 0x0508: -      bufpush(0x0509); -      break; -    case 0x050A: -      bufpush(0x050B); -      break; -    case 0x050C: -      bufpush(0x050D); -      break; -    case 0x050E: -      bufpush(0x050F); -      break; -    case 0x0531: -      bufpush(0x0561); -      break; -    case 0x0532: -      bufpush(0x0562); -      break; -    case 0x0533: -      bufpush(0x0563); -      break; -    case 0x0534: -      bufpush(0x0564); -      break; -    case 0x0535: -      bufpush(0x0565); -      break; -    case 0x0536: -      bufpush(0x0566); -      break; -    case 0x0537: -      bufpush(0x0567); -      break; -    case 0x0538: -      bufpush(0x0568); -      break; -    case 0x0539: -      bufpush(0x0569); -      break; -    case 0x053A: -      bufpush(0x056A); -      break; -    case 0x053B: -      bufpush(0x056B); -      break; -    case 0x053C: -      bufpush(0x056C); -      break; -    case 0x053D: -      bufpush(0x056D); -      break; -    case 0x053E: -      bufpush(0x056E); -      break; -    case 0x053F: -      bufpush(0x056F); -      break; -    case 0x0540: -      bufpush(0x0570); -      break; -    case 0x0541: -      bufpush(0x0571); -      break; -    case 0x0542: -      bufpush(0x0572); -      break; -    case 0x0543: -      bufpush(0x0573); -      break; -    case 0x0544: -      bufpush(0x0574); -      break; -    case 0x0545: -      bufpush(0x0575); -      break; -    case 0x0546: -      bufpush(0x0576); -      break; -    case 0x0547: -      bufpush(0x0577); -      break; -    case 0x0548: -      bufpush(0x0578); -      break; -    case 0x0549: -      bufpush(0x0579); -      break; -    case 0x054A: -      bufpush(0x057A); -      break; -    case 0x054B: -      bufpush(0x057B); -      break; -    case 0x054C: -      bufpush(0x057C); -      break; -    case 0x054D: -      bufpush(0x057D); -      break; -    case 0x054E: -      bufpush(0x057E); -      break; -    case 0x054F: -      bufpush(0x057F); -      break; -    case 0x0550: -      bufpush(0x0580); -      break; -    case 0x0551: -      bufpush(0x0581); -      break; -    case 0x0552: -      bufpush(0x0582); -      break; -    case 0x0553: -      bufpush(0x0583); -      break; -    case 0x0554: -      bufpush(0x0584); -      break; -    case 0x0555: -      bufpush(0x0585); -      break; -    case 0x0556: -      bufpush(0x0586); -      break; -    case 0x0587: -      bufpush(0x0565); -      bufpush(0x0582); -      break; -    case 0x1E00: -      bufpush(0x1E01); -      break; -    case 0x1E02: -      bufpush(0x1E03); -      break; -    case 0x1E04: -      bufpush(0x1E05); -      break; -    case 0x1E06: -      bufpush(0x1E07); -      break; -    case 0x1E08: -      bufpush(0x1E09); -      break; -    case 0x1E0A: -      bufpush(0x1E0B); -      break; -    case 0x1E0C: -      bufpush(0x1E0D); -      break; -    case 0x1E0E: -      bufpush(0x1E0F); -      break; -    case 0x1E10: -      bufpush(0x1E11); -      break; -    case 0x1E12: -      bufpush(0x1E13); -      break; -    case 0x1E14: -      bufpush(0x1E15); -      break; -    case 0x1E16: -      bufpush(0x1E17); -      break; -    case 0x1E18: -      bufpush(0x1E19); -      break; -    case 0x1E1A: -      bufpush(0x1E1B); -      break; -    case 0x1E1C: -      bufpush(0x1E1D); -      break; -    case 0x1E1E: -      bufpush(0x1E1F); -      break; -    case 0x1E20: -      bufpush(0x1E21); -      break; -    case 0x1E22: -      bufpush(0x1E23); -      break; -    case 0x1E24: -      bufpush(0x1E25); -      break; -    case 0x1E26: -      bufpush(0x1E27); -      break; -    case 0x1E28: -      bufpush(0x1E29); -      break; -    case 0x1E2A: -      bufpush(0x1E2B); -      break; -    case 0x1E2C: -      bufpush(0x1E2D); -      break; -    case 0x1E2E: -      bufpush(0x1E2F); -      break; -    case 0x1E30: -      bufpush(0x1E31); -      break; -    case 0x1E32: -      bufpush(0x1E33); -      break; -    case 0x1E34: -      bufpush(0x1E35); -      break; -    case 0x1E36: -      bufpush(0x1E37); -      break; -    case 0x1E38: -      bufpush(0x1E39); -      break; -    case 0x1E3A: -      bufpush(0x1E3B); -      break; -    case 0x1E3C: -      bufpush(0x1E3D); -      break; -    case 0x1E3E: -      bufpush(0x1E3F); -      break; -    case 0x1E40: -      bufpush(0x1E41); -      break; -    case 0x1E42: -      bufpush(0x1E43); -      break; -    case 0x1E44: -      bufpush(0x1E45); -      break; -    case 0x1E46: -      bufpush(0x1E47); -      break; -    case 0x1E48: -      bufpush(0x1E49); -      break; -    case 0x1E4A: -      bufpush(0x1E4B); -      break; -    case 0x1E4C: -      bufpush(0x1E4D); -      break; -    case 0x1E4E: -      bufpush(0x1E4F); -      break; -    case 0x1E50: -      bufpush(0x1E51); -      break; -    case 0x1E52: -      bufpush(0x1E53); -      break; -    case 0x1E54: -      bufpush(0x1E55); -      break; -    case 0x1E56: -      bufpush(0x1E57); -      break; -    case 0x1E58: -      bufpush(0x1E59); -      break; -    case 0x1E5A: -      bufpush(0x1E5B); -      break; -    case 0x1E5C: -      bufpush(0x1E5D); -      break; -    case 0x1E5E: -      bufpush(0x1E5F); -      break; -    case 0x1E60: -      bufpush(0x1E61); -      break; -    case 0x1E62: -      bufpush(0x1E63); -      break; -    case 0x1E64: -      bufpush(0x1E65); -      break; -    case 0x1E66: -      bufpush(0x1E67); -      break; -    case 0x1E68: -      bufpush(0x1E69); -      break; -    case 0x1E6A: -      bufpush(0x1E6B); -      break; -    case 0x1E6C: -      bufpush(0x1E6D); -      break; -    case 0x1E6E: -      bufpush(0x1E6F); -      break; -    case 0x1E70: -      bufpush(0x1E71); -      break; -    case 0x1E72: -      bufpush(0x1E73); -      break; -    case 0x1E74: -      bufpush(0x1E75); -      break; -    case 0x1E76: -      bufpush(0x1E77); -      break; -    case 0x1E78: -      bufpush(0x1E79); -      break; -    case 0x1E7A: -      bufpush(0x1E7B); -      break; -    case 0x1E7C: -      bufpush(0x1E7D); -      break; -    case 0x1E7E: -      bufpush(0x1E7F); -      break; -    case 0x1E80: -      bufpush(0x1E81); -      break; -    case 0x1E82: -      bufpush(0x1E83); -      break; -    case 0x1E84: -      bufpush(0x1E85); -      break; -    case 0x1E86: -      bufpush(0x1E87); -      break; -    case 0x1E88: -      bufpush(0x1E89); -      break; -    case 0x1E8A: -      bufpush(0x1E8B); -      break; -    case 0x1E8C: -      bufpush(0x1E8D); -      break; -    case 0x1E8E: -      bufpush(0x1E8F); -      break; -    case 0x1E90: -      bufpush(0x1E91); -      break; -    case 0x1E92: -      bufpush(0x1E93); -      break; -    case 0x1E94: -      bufpush(0x1E95); -      break; -    case 0x1E96: -      bufpush(0x0068); -      bufpush(0x0331); -      break; -    case 0x1E97: -      bufpush(0x0074); -      bufpush(0x0308); -      break; -    case 0x1E98: -      bufpush(0x0077); -      bufpush(0x030A); -      break; -    case 0x1E99: -      bufpush(0x0079); -      bufpush(0x030A); -      break; -    case 0x1E9A: -      bufpush(0x0061); -      bufpush(0x02BE); -      break; -    case 0x1E9B: -      bufpush(0x1E61); -      break; -    case 0x1EA0: -      bufpush(0x1EA1); -      break; -    case 0x1EA2: -      bufpush(0x1EA3); -      break; -    case 0x1EA4: -      bufpush(0x1EA5); -      break; -    case 0x1EA6: -      bufpush(0x1EA7); -      break; -    case 0x1EA8: -      bufpush(0x1EA9); -      break; -    case 0x1EAA: -      bufpush(0x1EAB); -      break; -    case 0x1EAC: -      bufpush(0x1EAD); -      break; -    case 0x1EAE: -      bufpush(0x1EAF); -      break; -    case 0x1EB0: -      bufpush(0x1EB1); -      break; -    case 0x1EB2: -      bufpush(0x1EB3); -      break; -    case 0x1EB4: -      bufpush(0x1EB5); -      break; -    case 0x1EB6: -      bufpush(0x1EB7); -      break; -    case 0x1EB8: -      bufpush(0x1EB9); -      break; -    case 0x1EBA: -      bufpush(0x1EBB); -      break; -    case 0x1EBC: -      bufpush(0x1EBD); -      break; -    case 0x1EBE: -      bufpush(0x1EBF); -      break; -    case 0x1EC0: -      bufpush(0x1EC1); -      break; -    case 0x1EC2: -      bufpush(0x1EC3); -      break; -    case 0x1EC4: -      bufpush(0x1EC5); -      break; -    case 0x1EC6: -      bufpush(0x1EC7); -      break; -    case 0x1EC8: -      bufpush(0x1EC9); -      break; -    case 0x1ECA: -      bufpush(0x1ECB); -      break; -    case 0x1ECC: -      bufpush(0x1ECD); -      break; -    case 0x1ECE: -      bufpush(0x1ECF); -      break; -    case 0x1ED0: -      bufpush(0x1ED1); -      break; -    case 0x1ED2: -      bufpush(0x1ED3); -      break; -    case 0x1ED4: -      bufpush(0x1ED5); -      break; -    case 0x1ED6: -      bufpush(0x1ED7); -      break; -    case 0x1ED8: -      bufpush(0x1ED9); -      break; -    case 0x1EDA: -      bufpush(0x1EDB); -      break; -    case 0x1EDC: -      bufpush(0x1EDD); -      break; -    case 0x1EDE: -      bufpush(0x1EDF); -      break; -    case 0x1EE0: -      bufpush(0x1EE1); -      break; -    case 0x1EE2: -      bufpush(0x1EE3); -      break; -    case 0x1EE4: -      bufpush(0x1EE5); -      break; -    case 0x1EE6: -      bufpush(0x1EE7); -      break; -    case 0x1EE8: -      bufpush(0x1EE9); -      break; -    case 0x1EEA: -      bufpush(0x1EEB); -      break; -    case 0x1EEC: -      bufpush(0x1EED); -      break; -    case 0x1EEE: -      bufpush(0x1EEF); -      break; -    case 0x1EF0: -      bufpush(0x1EF1); -      break; -    case 0x1EF2: -      bufpush(0x1EF3); -      break; -    case 0x1EF4: -      bufpush(0x1EF5); -      break; -    case 0x1EF6: -      bufpush(0x1EF7); -      break; -    case 0x1EF8: -      bufpush(0x1EF9); -      break; -    case 0x1F08: -      bufpush(0x1F00); -      break; -    case 0x1F09: -      bufpush(0x1F01); -      break; -    case 0x1F0A: -      bufpush(0x1F02); -      break; -    case 0x1F0B: -      bufpush(0x1F03); -      break; -    case 0x1F0C: -      bufpush(0x1F04); -      break; -    case 0x1F0D: -      bufpush(0x1F05); -      break; -    case 0x1F0E: -      bufpush(0x1F06); -      break; -    case 0x1F0F: -      bufpush(0x1F07); -      break; -    case 0x1F18: -      bufpush(0x1F10); -      break; -    case 0x1F19: -      bufpush(0x1F11); -      break; -    case 0x1F1A: -      bufpush(0x1F12); -      break; -    case 0x1F1B: -      bufpush(0x1F13); -      break; -    case 0x1F1C: -      bufpush(0x1F14); -      break; -    case 0x1F1D: -      bufpush(0x1F15); -      break; -    case 0x1F28: -      bufpush(0x1F20); -      break; -    case 0x1F29: -      bufpush(0x1F21); -      break; -    case 0x1F2A: -      bufpush(0x1F22); -      break; -    case 0x1F2B: -      bufpush(0x1F23); -      break; -    case 0x1F2C: -      bufpush(0x1F24); -      break; -    case 0x1F2D: -      bufpush(0x1F25); -      break; -    case 0x1F2E: -      bufpush(0x1F26); -      break; -    case 0x1F2F: -      bufpush(0x1F27); -      break; -    case 0x1F38: -      bufpush(0x1F30); -      break; -    case 0x1F39: -      bufpush(0x1F31); -      break; -    case 0x1F3A: -      bufpush(0x1F32); -      break; -    case 0x1F3B: -      bufpush(0x1F33); -      break; -    case 0x1F3C: -      bufpush(0x1F34); -      break; -    case 0x1F3D: -      bufpush(0x1F35); -      break; -    case 0x1F3E: -      bufpush(0x1F36); -      break; -    case 0x1F3F: -      bufpush(0x1F37); -      break; -    case 0x1F48: -      bufpush(0x1F40); -      break; -    case 0x1F49: -      bufpush(0x1F41); -      break; -    case 0x1F4A: -      bufpush(0x1F42); -      break; -    case 0x1F4B: -      bufpush(0x1F43); -      break; -    case 0x1F4C: -      bufpush(0x1F44); -      break; -    case 0x1F4D: -      bufpush(0x1F45); -      break; -    case 0x1F50: -      bufpush(0x03C5); -      bufpush(0x0313); -      break; -    case 0x1F52: -      bufpush(0x03C5); -      bufpush(0x0313); -      bufpush(0x0300); -      break; -    case 0x1F54: -      bufpush(0x03C5); -      bufpush(0x0313); -      bufpush(0x0301); -      break; -    case 0x1F56: -      bufpush(0x03C5); -      bufpush(0x0313); -      bufpush(0x0342); -      break; -    case 0x1F59: -      bufpush(0x1F51); -      break; -    case 0x1F5B: -      bufpush(0x1F53); -      break; -    case 0x1F5D: -      bufpush(0x1F55); -      break; -    case 0x1F5F: -      bufpush(0x1F57); -      break; -    case 0x1F68: -      bufpush(0x1F60); -      break; -    case 0x1F69: -      bufpush(0x1F61); -      break; -    case 0x1F6A: -      bufpush(0x1F62); -      break; -    case 0x1F6B: -      bufpush(0x1F63); -      break; -    case 0x1F6C: -      bufpush(0x1F64); -      break; -    case 0x1F6D: -      bufpush(0x1F65); -      break; -    case 0x1F6E: -      bufpush(0x1F66); -      break; -    case 0x1F6F: -      bufpush(0x1F67); -      break; -    case 0x1F80: -      bufpush(0x1F00); -      bufpush(0x03B9); -      break; -    case 0x1F81: -      bufpush(0x1F01); -      bufpush(0x03B9); -      break; -    case 0x1F82: -      bufpush(0x1F02); -      bufpush(0x03B9); -      break; -    case 0x1F83: -      bufpush(0x1F03); -      bufpush(0x03B9); -      break; -    case 0x1F84: -      bufpush(0x1F04); -      bufpush(0x03B9); -      break; -    case 0x1F85: -      bufpush(0x1F05); -      bufpush(0x03B9); -      break; -    case 0x1F86: -      bufpush(0x1F06); -      bufpush(0x03B9); -      break; -    case 0x1F87: -      bufpush(0x1F07); -      bufpush(0x03B9); -      break; -    case 0x1F88: -      bufpush(0x1F00); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F89: -      bufpush(0x1F01); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F8A: -      bufpush(0x1F02); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F8B: -      bufpush(0x1F03); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F8C: -      bufpush(0x1F04); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F8D: -      bufpush(0x1F05); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F8E: -      bufpush(0x1F06); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F8F: -      bufpush(0x1F07); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F90: -      bufpush(0x1F20); -      bufpush(0x03B9); -      break; -    case 0x1F91: -      bufpush(0x1F21); -      bufpush(0x03B9); -      break; -    case 0x1F92: -      bufpush(0x1F22); -      bufpush(0x03B9); -      break; -    case 0x1F93: -      bufpush(0x1F23); -      bufpush(0x03B9); -      break; -    case 0x1F94: -      bufpush(0x1F24); -      bufpush(0x03B9); -      break; -    case 0x1F95: -      bufpush(0x1F25); -      bufpush(0x03B9); -      break; -    case 0x1F96: -      bufpush(0x1F26); -      bufpush(0x03B9); -      break; -    case 0x1F97: -      bufpush(0x1F27); -      bufpush(0x03B9); -      break; -    case 0x1F98: -      bufpush(0x1F20); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F99: -      bufpush(0x1F21); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F9A: -      bufpush(0x1F22); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F9B: -      bufpush(0x1F23); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F9C: -      bufpush(0x1F24); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F9D: -      bufpush(0x1F25); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F9E: -      bufpush(0x1F26); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1F9F: -      bufpush(0x1F27); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FA0: -      bufpush(0x1F60); -      bufpush(0x03B9); -      break; -    case 0x1FA1: -      bufpush(0x1F61); -      bufpush(0x03B9); -      break; -    case 0x1FA2: -      bufpush(0x1F62); -      bufpush(0x03B9); -      break; -    case 0x1FA3: -      bufpush(0x1F63); -      bufpush(0x03B9); -      break; -    case 0x1FA4: -      bufpush(0x1F64); -      bufpush(0x03B9); -      break; -    case 0x1FA5: -      bufpush(0x1F65); -      bufpush(0x03B9); -      break; -    case 0x1FA6: -      bufpush(0x1F66); -      bufpush(0x03B9); -      break; -    case 0x1FA7: -      bufpush(0x1F67); -      bufpush(0x03B9); -      break; -    case 0x1FA8: -      bufpush(0x1F60); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FA9: -      bufpush(0x1F61); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FAA: -      bufpush(0x1F62); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FAB: -      bufpush(0x1F63); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FAC: -      bufpush(0x1F64); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FAD: -      bufpush(0x1F65); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FAE: -      bufpush(0x1F66); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FAF: -      bufpush(0x1F67); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FB2: -      bufpush(0x1F70); -      bufpush(0x03B9); -      break; -    case 0x1FB3: -      bufpush(0x03B1); -      bufpush(0x03B9); -      break; -    case 0x1FB4: -      bufpush(0x03AC); -      bufpush(0x03B9); -      break; -    case 0x1FB6: -      bufpush(0x03B1); -      bufpush(0x0342); -      break; -    case 0x1FB7: -      bufpush(0x03B1); -      bufpush(0x0342); -      bufpush(0x03B9); -      break; -    case 0x1FB8: -      bufpush(0x1FB0); -      break; -    case 0x1FB9: -      bufpush(0x1FB1); -      break; -    case 0x1FBA: -      bufpush(0x1F70); -      break; -    case 0x1FBB: -      bufpush(0x1F71); -      break; -    case 0x1FBC: -      bufpush(0x03B1); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FBE: -      bufpush(0x03B9); -      break; -    case 0x1FC2: -      bufpush(0x1F74); -      bufpush(0x03B9); -      break; -    case 0x1FC3: -      bufpush(0x03B7); -      bufpush(0x03B9); -      break; -    case 0x1FC4: -      bufpush(0x03AE); -      bufpush(0x03B9); -      break; -    case 0x1FC6: -      bufpush(0x03B7); -      bufpush(0x0342); -      break; -    case 0x1FC7: -      bufpush(0x03B7); -      bufpush(0x0342); -      bufpush(0x03B9); -      break; -    case 0x1FC8: -      bufpush(0x1F72); -      break; -    case 0x1FC9: -      bufpush(0x1F73); -      break; -    case 0x1FCA: -      bufpush(0x1F74); -      break; -    case 0x1FCB: -      bufpush(0x1F75); -      break; -    case 0x1FCC: -      bufpush(0x03B7); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x1FD2: -      bufpush(0x03B9); -      bufpush(0x0308); -      bufpush(0x0300); -      break; -    case 0x1FD3: -      bufpush(0x03B9); -      bufpush(0x0308); -      bufpush(0x0301); -      break; -    case 0x1FD6: -      bufpush(0x03B9); -      bufpush(0x0342); -      break; -    case 0x1FD7: -      bufpush(0x03B9); -      bufpush(0x0308); -      bufpush(0x0342); -      break; -    case 0x1FD8: -      bufpush(0x1FD0); -      break; -    case 0x1FD9: -      bufpush(0x1FD1); -      break; -    case 0x1FDA: -      bufpush(0x1F76); -      break; -    case 0x1FDB: -      bufpush(0x1F77); -      break; -    case 0x1FE2: -      bufpush(0x03C5); -      bufpush(0x0308); -      bufpush(0x0300); -      break; -    case 0x1FE3: -      bufpush(0x03C5); -      bufpush(0x0308); -      bufpush(0x0301); -      break; -    case 0x1FE4: -      bufpush(0x03C1); -      bufpush(0x0313); -      break; -    case 0x1FE6: -      bufpush(0x03C5); -      bufpush(0x0342); -      break; -    case 0x1FE7: -      bufpush(0x03C5); -      bufpush(0x0308); -      bufpush(0x0342); -      break; -    case 0x1FE8: -      bufpush(0x1FE0); -      break; -    case 0x1FE9: -      bufpush(0x1FE1); -      break; -    case 0x1FEA: -      bufpush(0x1F7A); -      break; -    case 0x1FEB: -      bufpush(0x1F7B); -      break; -    case 0x1FEC: -      bufpush(0x1FE5); -      break; -    case 0x1FF2: -      bufpush(0x1F7C); -      bufpush(0x03B9); -      break; -    case 0x1FF3: -      bufpush(0x03C9); -      bufpush(0x03B9); -      break; -    case 0x1FF4: -      bufpush(0x03CE); -      bufpush(0x03B9); -      break; -    case 0x1FF6: -      bufpush(0x03C9); -      bufpush(0x0342); -      break; -    case 0x1FF7: -      bufpush(0x03C9); -      bufpush(0x0342); -      bufpush(0x03B9); -      break; -    case 0x1FF8: -      bufpush(0x1F78); -      break; -    case 0x1FF9: -      bufpush(0x1F79); -      break; -    case 0x1FFA: -      bufpush(0x1F7C); -      break; -    case 0x1FFB: -      bufpush(0x1F7D); -      break; -    case 0x1FFC: -      bufpush(0x03C9); -      bufpush(0x03B9); -      break; -    case 0x: -      break; -    case 0x2126: -      bufpush(0x03C9); -      break; -    case 0x212A: -      bufpush(0x006B); -      break; -    case 0x212B: -      bufpush(0x00E5); -      break; -    case 0x2160: -      bufpush(0x2170); -      break; -    case 0x2161: -      bufpush(0x2171); -      break; -    case 0x2162: -      bufpush(0x2172); -      break; -    case 0x2163: -      bufpush(0x2173); -      break; -    case 0x2164: -      bufpush(0x2174); -      break; -    case 0x2165: -      bufpush(0x2175); -      break; -    case 0x2166: -      bufpush(0x2176); -      break; -    case 0x2167: -      bufpush(0x2177); -      break; -    case 0x2168: -      bufpush(0x2178); -      break; -    case 0x2169: -      bufpush(0x2179); -      break; -    case 0x216A: -      bufpush(0x217A); -      break; -    case 0x216B: -      bufpush(0x217B); -      break; -    case 0x216C: -      bufpush(0x217C); -      break; -    case 0x216D: -      bufpush(0x217D); -      break; -    case 0x216E: -      bufpush(0x217E); -      break; -    case 0x216F: -      bufpush(0x217F); -      break; -    case 0x24B6: -      bufpush(0x24D0); -      break; -    case 0x24B7: -      bufpush(0x24D1); -      break; -    case 0x24B8: -      bufpush(0x24D2); -      break; -    case 0x24B9: -      bufpush(0x24D3); -      break; -    case 0x24BA: -      bufpush(0x24D4); -      break; -    case 0x24BB: -      bufpush(0x24D5); -      break; -    case 0x24BC: -      bufpush(0x24D6); -      break; -    case 0x24BD: -      bufpush(0x24D7); -      break; -    case 0x24BE: -      bufpush(0x24D8); -      break; -    case 0x24BF: -      bufpush(0x24D9); -      break; -    case 0x24C0: -      bufpush(0x24DA); -      break; -    case 0x24C1: -      bufpush(0x24DB); -      break; -    case 0x24C2: -      bufpush(0x24DC); -      break; -    case 0x24C3: -      bufpush(0x24DD); -      break; -    case 0x24C4: -      bufpush(0x24DE); -      break; -    case 0x24C5: -      bufpush(0x24DF); -      break; -    case 0x24C6: -      bufpush(0x24E0); -      break; -    case 0x24C7: -      bufpush(0x24E1); -      break; -    case 0x24C8: -      bufpush(0x24E2); -      break; -    case 0x24C9: -      bufpush(0x24E3); -      break; -    case 0x24CA: -      bufpush(0x24E4); -      break; -    case 0x24CB: -      bufpush(0x24E5); -      break; -    case 0x24CC: -      bufpush(0x24E6); -      break; -    case 0x24CD: -      bufpush(0x24E7); -      break; -    case 0x24CE: -      bufpush(0x24E8); -      break; -    case 0x24CF: -      bufpush(0x24E9); -      break; -    case 0xFB00: -      bufpush(0x0066); -      bufpush(0x0066); -      break; -    case 0xFB01: -      bufpush(0x0066); -      bufpush(0x0069); -      break; -    case 0xFB02: -      bufpush(0x0066); -      bufpush(0x006C); -      break; -    case 0xFB03: -      bufpush(0x0066); -      bufpush(0x0066); -      bufpush(0x0069); -      break; -    case 0xFB04: -      bufpush(0x0066); -      bufpush(0x0066); -      bufpush(0x006C); -      break; -    case 0xFB05: -      bufpush(0x0073); -      bufpush(0x0074); -      break; -    case 0xFB06: -      bufpush(0x0073); -      bufpush(0x0074); -      break; -    case 0xFB13: -      bufpush(0x0574); -      bufpush(0x0576); -      break; -    case 0xFB14: -      bufpush(0x0574); -      bufpush(0x0565); -      break; -    case 0xFB15: -      bufpush(0x0574); -      bufpush(0x056B); -      break; -    case 0xFB16: -      bufpush(0x057E); -      bufpush(0x0576); -      break; -    case 0xFB17: -      bufpush(0x0574); -      bufpush(0x056D); -      break; -    case 0xFF21: -      bufpush(0xFF41); -      break; -    case 0xFF22: -      bufpush(0xFF42); -      break; -    case 0xFF23: -      bufpush(0xFF43); -      break; -    case 0xFF24: -      bufpush(0xFF44); -      break; -    case 0xFF25: -      bufpush(0xFF45); -      break; -    case 0xFF26: -      bufpush(0xFF46); -      break; -    case 0xFF27: -      bufpush(0xFF47); -      break; -    case 0xFF28: -      bufpush(0xFF48); -      break; -    case 0xFF29: -      bufpush(0xFF49); -      break; -    case 0xFF2A: -      bufpush(0xFF4A); -      break; -    case 0xFF2B: -      bufpush(0xFF4B); -      break; -    case 0xFF2C: -      bufpush(0xFF4C); -      break; -    case 0xFF2D: -      bufpush(0xFF4D); -      break; -    case 0xFF2E: -      bufpush(0xFF4E); -      break; -    case 0xFF2F: -      bufpush(0xFF4F); -      break; -    case 0xFF30: -      bufpush(0xFF50); -      break; -    case 0xFF31: -      bufpush(0xFF51); -      break; -    case 0xFF32: -      bufpush(0xFF52); -      break; -    case 0xFF33: -      bufpush(0xFF53); -      break; -    case 0xFF34: -      bufpush(0xFF54); -      break; -    case 0xFF35: -      bufpush(0xFF55); -      break; -    case 0xFF36: -      bufpush(0xFF56); -      break; -    case 0xFF37: -      bufpush(0xFF57); -      break; -    case 0xFF38: -      bufpush(0xFF58); -      break; -    case 0xFF39: -      bufpush(0xFF59); -      break; -    case 0xFF3A: -      bufpush(0xFF5A); -      break; -    case 0x10400: -      bufpush(0x10428); -      break; -    case 0x10401: -      bufpush(0x10429); -      break; -    case 0x10402: -      bufpush(0x1042A); -      break; -    case 0x10403: -      bufpush(0x1042B); -      break; -    case 0x10404: -      bufpush(0x1042C); -      break; -    case 0x10405: -      bufpush(0x1042D); -      break; -    case 0x10406: -      bufpush(0x1042E); -      break; -    case 0x10407: -      bufpush(0x1042F); -      break; -    case 0x10408: -      bufpush(0x10430); -      break; -    case 0x10409: -      bufpush(0x10431); -      break; -    case 0x1040A: -      bufpush(0x10432); -      break; -    case 0x1040B: -      bufpush(0x10433); -      break; -    case 0x1040C: -      bufpush(0x10434); -      break; -    case 0x1040D: -      bufpush(0x10435); -      break; -    case 0x1040E: -      bufpush(0x10436); -      break; -    case 0x1040F: -      bufpush(0x10437); -      break; -    case 0x10410: -      bufpush(0x10438); -      break; -    case 0x10411: -      bufpush(0x10439); -      break; -    case 0x10412: -      bufpush(0x1043A); -      break; -    case 0x10413: -      bufpush(0x1043B); -      break; -    case 0x10414: -      bufpush(0x1043C); -      break; -    case 0x10415: -      bufpush(0x1043D); -      break; -    case 0x10416: -      bufpush(0x1043E); -      break; -    case 0x10417: -      bufpush(0x1043F); -      break; -    case 0x10418: -      bufpush(0x10440); -      break; -    case 0x10419: -      bufpush(0x10441); -      break; -    case 0x1041A: -      bufpush(0x10442); -      break; -    case 0x1041B: -      bufpush(0x10443); -      break; -    case 0x1041C: -      bufpush(0x10444); -      break; -    case 0x1041D: -      bufpush(0x10445); -      break; -    case 0x1041E: -      bufpush(0x10446); -      break; -    case 0x1041F: -      bufpush(0x10447); -      break; -    case 0x10420: -      bufpush(0x10448); -      break; -    case 0x10421: -      bufpush(0x10449); -      break; -    case 0x10422: -      bufpush(0x1044A); -      break; -    case 0x10423: -      bufpush(0x1044B); -      break; -    case 0x10424: -      bufpush(0x1044C); -      break; -    case 0x10425: -      bufpush(0x1044D); -      break; -  } diff --git a/src/detab.c b/src/detab.c deleted file mode 100644 index e03fcf7..0000000 --- a/src/detab.c +++ /dev/null @@ -1,48 +0,0 @@ -#include "bstrlib.h" - -// UTF-8 aware detab:  assumes s has no newlines, or only a final newline. -// Return 0 on success, BSTR_ERR if invalid UTF-8. -extern int bdetab(bstring s, int utf8) -{ -  unsigned char c; -  int pos = 0;  // a count of characters -  int byte = 0; // a count of bytes -  int high_chars_to_skip = 0; -  int numspaces = 0; -  while ((c = bchar(s, byte))) { -    if (utf8 && high_chars_to_skip > 0) { -      if (c >= 0x80) { -        high_chars_to_skip--; -        byte++; -      } else { -        return BSTR_ERR; // invalid utf-8 -      } -    } else if (c == '\t') { -      bdelete(s, byte, 1); // delete tab character -      numspaces = 4 - (pos % 4); -      binsertch(s, byte, numspaces, ' '); -      byte += numspaces; -      pos  += numspaces; -    } else if (c <= 0x80 || !utf8) { -      byte++; -      pos++; -    } else {  // multibyte utf8 sequences -      if (c >> 1 == 0176) { -        high_chars_to_skip = 5; -      } else if (c >> 2 == 076) { -        high_chars_to_skip = 4; -      } else if (c >> 3 == 036) { -        high_chars_to_skip = 3; -      } else if (c >> 4 == 016) { -        high_chars_to_skip = 2; -      } else if (c >> 5 == 06) { -        high_chars_to_skip = 1; -      } else { -        return BSTR_ERR; // invalid utf-8 -      } -      pos++; -      byte++; -    } -  } -  return 0; -} diff --git a/src/getopt.c b/src/getopt.c deleted file mode 100644 index 321dd9f..0000000 --- a/src/getopt.c +++ /dev/null @@ -1,199 +0,0 @@ -/* $Id: getopt.c 4022 2008-03-31 06:11:07Z rra $ - * - * Replacement implementation of getopt. - * - * This is a replacement implementation for getopt based on the my_getopt - * distribution by Benjamin Sittler.  Only the getopt interface is included, - * since remctl doesn't use GNU long options, and the code has been rearranged - * and reworked somewhat to fit with the remctl coding style. - * - * Copyright 1997, 2000, 2001, 2002 Benjamin Sittler - * Copyright 2008 Russ Allbery <rra@stanford.edu> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - *   - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - *   - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include <config.h> -#include <portable/system.h> -#include <portable/getopt.h> - -/* - * If we're running the test suite, rename getopt and the global variables to - * avoid conflicts with the system version. - */ -#if TESTING -# define getopt test_getopt -int test_getopt(int, char **, const char *); -# define optind test_optind -# define opterr test_opterr -# define optopt test_optopt -# define optarg test_optarg -#endif - -/* Initialize global interface variables. */ -int optind = 1; -int opterr = 1; -int optopt = 0; -char *optarg = NULL; - -/* - * This is the plain old UNIX getopt, with GNU-style extensions.  If you're - * porting some piece of UNIX software, this is all you need.  It supports - * GNU-style permutation and optional arguments, but does not support the GNU - * -W extension. - * - * This function is not re-entrant or thread-safe, has static variables, and - * generally isn't a great interface, but normally you only call it once. - */ -int -getopt(int argc, char *argv[], const char *optstring) -{ -    const char *p; -    size_t offset = 0; -    char mode = '\0'; -    int colon_mode = 0; -    int option = -1; - -    /* Holds the current position in the parameter being parsed. */ -    static int charind = 0; - -    /* -     * By default, getopt permutes argv as it scans and leaves all non-options -     * at the end.  This can be changed with the first character of optstring -     * or the environment variable POSIXLY_CORRECT.  With a first character of -     * '+' or when POSIXLY_CORRECT is set, option processing stops at the -     * first non-option.  If the first character is '-', each non-option argv -     * element is handled as if it were the argument of an option with -     * character code 1.  mode holds this character. -     * -     * After the optional leading '+' and '-', optstring may contain ':'.  If -     * present, missing arguments return ':' instead of '?'.  colon_mode holds -     * this setting. -     */ -    if (getenv("POSIXLY_CORRECT") != NULL) { -        mode = '+'; -        colon_mode = '+'; -    } else { -        if (optstring[offset] == '+' || optstring[offset] == '-') { -            mode = optstring[offset]; -            offset++; -        } -        if (optstring[offset] == ':') { -            colon_mode = 1; -            offset++; -        } -    } - -    /* -     * charind holds where we left off.  If it's set, we were in the middle -     * of an argv element; if not, we pick up with the next element of -     * optind. -     */ -    optarg = NULL; -    if (charind == 0) { -        if (optind >= argc) -            option = -1; -        else if (strcmp(argv[optind], "--") == 0) { -            optind++; -            option = -1; -        } else if (argv[optind][0] != '-' || argv[optind][1] == '\0') { -            char *tmp; -            int i, j, k, end; - -            if (mode == '+') -                option = -1; -            else if (mode == '-') { -                optarg = argv[optind]; -                optind++; -                option = 1; -            } else { -                for (i = optind + 1, j = optind; i < argc; i++) -                    if ((argv[i][0] == '-') && (argv[i][1] != '\0')) { -                        optind = i; -                        option = getopt(argc, argv, optstring); -                        while (i > j) { -                            --i; -                            tmp = argv[i]; -                            end = (charind == 0) ? optind - 1 : optind; -                            for (k = i; k + 1 <= end; k++) { -                                argv[k] = argv[k + 1]; -                            } -                            argv[end] = tmp; -                            --optind; -                        } -                        break; -                    } -                if (i == argc) -                    option = -1; -            } -            return option; -        } else { -            charind = 1; -        } -    } -    if (charind != 0) { -        optopt = argv[optind][charind]; -        for (p = optstring + offset; *p != '\0'; p++) -            if (optopt == *p) { -                p++; -                if (*p == ':') { -                    if (argv[optind][charind + 1] != '\0') { -                        optarg = &argv[optind][charind + 1]; -                        optind++; -                        charind = 0; -                    } else { -                        p++; -                        if (*p != ':') { -                            charind = 0; -                            optind++; -                            if (optind >= argc) { -                                if (opterr) -                                    fprintf(stderr, "%s: option requires" -                                            " an argument -- %c\n", argv[0], -                                            optopt); -                                option = colon_mode ? ':' : '?'; -                                goto done; -                            } else { -                                optarg = argv[optind]; -                                optind++; -                            } -                        } -                    } -                } -                option = optopt; -            } -        if (option == -1) { -            if (opterr) -                fprintf(stderr, "%s: illegal option -- %c\n", argv[0], optopt); -            option = '?'; -        } -    } - -done: -    if (charind != 0) { -        charind++; -        if (argv[optind][charind] == '\0') { -            optind++; -            charind = 0; -        } -    } -    if (optind > argc) -        optind = argc; -    return option; -} diff --git a/src/inlines.c b/src/inlines.c index f75c846..4ff45ad 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -2,133 +2,154 @@  #include <stdio.h>  #include <stdbool.h>  #include <ctype.h> -#include "bstrlib.h" +#include <string.h> +  #include "stmd.h"  #include "uthash.h"  #include "debug.h"  #include "scanners.h"  #include "utf8.h" +typedef struct Subject { +  const gh_buf   *buffer; +  int            pos; +  reference**    reference_map; +  int            label_nestlevel; +} subject; + +reference* lookup_reference(reference** refmap, chunk *label); +reference* make_reference(chunk *label, chunk *url, chunk *title); + +static unsigned char *clean_url(chunk *url); +static unsigned char *clean_title(chunk *title); + +inline static unsigned char *chunk_to_cstr(chunk *c); +inline static void chunk_free(chunk *c); +inline static void chunk_trim(chunk *c); + +inline static chunk chunk_literal(const char *data); +inline static chunk chunk_buf_detach(gh_buf *buf); +inline static chunk chunk_buf(const gh_buf *buf, int pos, int len); + +static inl *parse_chunk_inlines(chunk *chunk, reference** refmap); +static inl *parse_inlines_while(subject* subj, int (*f)(subject*)); +static int parse_inline(subject* subj, inl ** last); +  extern void free_reference(reference *ref) { -  bdestroy(ref->label); -  bdestroy(ref->url); -  bdestroy(ref->title); -  free(ref); +	free(ref->label); +	free(ref->url); +	free(ref->title); +	free(ref);  }  extern void free_reference_map(reference **refmap) { -  /* free the hash table contents */ -  reference *s; -  reference *tmp; -  if (refmap != NULL) { -    HASH_ITER(hh, *refmap, s, tmp) { -      HASH_DEL(*refmap, s); -      free_reference(s); -    } -    free(refmap); -  } +	/* free the hash table contents */ +	reference *s; +	reference *tmp; +	if (refmap != NULL) { +		HASH_ITER(hh, *refmap, s, tmp) { +			HASH_DEL(*refmap, s); +			free_reference(s); +		} +		free(refmap); +	}  }  // normalize reference:  collapse internal whitespace to single space,  // remove leading/trailing whitespace, case fold -static bstring normalize_reference(bstring s) -{ -  bstring normalized = case_fold(s); -  int pos = 0; -  int startpos; -  char c; -  while ((c = bchar(normalized, pos))) { -    if (isspace(c)) { -      startpos = pos; -      // skip til next non-space -      pos++; -      while (isspace(bchar(s, pos))) { -        pos++; -      } -      bdelete(normalized, startpos, pos - startpos); -      binsertch(normalized, startpos, 1, ' '); -      pos = startpos + 1; -    } -    pos++; -  } -  btrimws(normalized); -  return normalized; +static unsigned char *normalize_reference(chunk *ref) +{ +	gh_buf normalized = GH_BUF_INIT; +	int r, w; + +	utf8proc_case_fold(&normalized, ref->data, ref->len); +	gh_buf_trim(&normalized); + +	for (r = 0, w = 0; r < normalized.size; ++r) { +		if (r && gh_buf_at(&normalized, r - 1) == ' ') { +			while (gh_buf_at(&normalized, r) == ' ') +				r++; +		} + +		normalized.ptr[w++] = normalized.ptr[r]; +	} + +	return gh_buf_detach(&normalized);  }  // Returns reference if refmap contains a reference with matching  // label, otherwise NULL. -extern reference* lookup_reference(reference** refmap, bstring lab) +extern reference* lookup_reference(reference** refmap, chunk *label)  { -  reference * ref = NULL; -  bstring label = normalize_reference(lab); -  if (refmap != NULL) { -    HASH_FIND_STR(*refmap, (char*) label->data, ref); -  } -  bdestroy(label); -  return ref; +	reference *ref = NULL; +	unsigned char *norm = normalize_reference(label); +	if (refmap != NULL) { +		HASH_FIND_STR(*refmap, (char*)norm, ref); +	} +	free(label); +	return ref;  } -extern reference* make_reference(bstring label, bstring url, bstring title) +extern reference* make_reference(chunk *label, chunk *url, chunk *title)  { -  reference * ref; -  ref = malloc(sizeof(reference)); -  ref->label = normalize_reference(label); -  ref->url = bstrcpy(url); -  ref->title = bstrcpy(title); -  return ref; +	reference *ref; +	ref = malloc(sizeof(reference)); +	ref->label = normalize_reference(label); +	ref->url = clean_url(url); +	ref->title = clean_title(title); +	return ref;  }  extern void add_reference(reference** refmap, reference* ref)  { -  reference * t = NULL; -  HASH_FIND(hh, *refmap, (char*) ref->label->data, -            (unsigned) blength(ref->label), t); -  if (t == NULL) { -    HASH_ADD_KEYPTR(hh, *refmap, (char*) ref->label->data, -                    (unsigned) blength(ref->label), ref); -  } else { -    free_reference(ref);  // we free this now since it won't be in the refmap -  } +	reference * t = NULL; +	HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t); + +	if (t == NULL) { +		HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref); +	} else { +		free_reference(ref);  // we free this now since it won't be in the refmap +	}  }  // Create an inline with a linkable string value. -inline static inl* make_linkable(int t, inl* label, bstring url, bstring title) +inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)  { -  inl* e = (inl*) malloc(sizeof(inl)); -  e->tag = t; -  e->content.linkable.label = label; -  e->content.linkable.url   = url; -  e->content.linkable.title = title; -  e->next = NULL; -  return e; +	inl* e = (inl*) malloc(sizeof(inl)); +	e->tag = t; +	e->content.linkable.label = label; +	e->content.linkable.url   = chunk_to_cstr(&url); +	e->content.linkable.title = chunk_to_cstr(&title); +	e->next = NULL; +	return e;  }  inline static inl* make_inlines(int t, inl* contents)  { -  inl* e = (inl*) malloc(sizeof(inl)); -  e->tag = t; -  e->content.inlines = contents; -  e->next = NULL; -  return e; +	inl* e = (inl*) malloc(sizeof(inl)); +	e->tag = t; +	e->content.inlines = contents; +	e->next = NULL; +	return e;  }  // Create an inline with a literal string value. -inline static inl* make_literal(int t, bstring s) +inline static inl* make_literal(int t, chunk s)  { -  inl* e = (inl*) malloc(sizeof(inl)); -  e->tag = t; -  e->content.literal = s; -  e->next = NULL; -  return e; +	inl* e = (inl*) malloc(sizeof(inl)); +	e->tag = t; +	e->content.literal = s; +	e->next = NULL; +	return e;  }  // Create an inline with no value.  inline static inl* make_simple(int t)  { -  inl* e = (inl*) malloc(sizeof(inl)); -  e->tag = t; -  e->next = NULL; -  return e; +	inl* e = (inl*) malloc(sizeof(inl)); +	e->tag = t; +	e->next = NULL; +	return e;  }  // Macros for creating various kinds of inlines. @@ -139,113 +160,157 @@ inline static inl* make_simple(int t)  #define make_linebreak() make_simple(linebreak)  #define make_softbreak() make_simple(softbreak)  #define make_link(label, url, title) make_linkable(link, label, url, title) -#define make_image(alt, url, title) make_linkable(image, alt, url, title)  #define make_emph(contents) make_inlines(emph, contents)  #define make_strong(contents) make_inlines(strong, contents)  // Free an inline list.  extern void free_inlines(inl* e)  { -  inl * next; -  while (e != NULL) { -    switch (e->tag){ -    case str: -    case raw_html: -    case code: -    case entity: -      bdestroy(e->content.literal); -      break; -    case linebreak: -    case softbreak: -      break; -    case link: -    case image: -      bdestroy(e->content.linkable.url); -      bdestroy(e->content.linkable.title); -      free_inlines(e->content.linkable.label); -      break; -    case emph: -    case strong: -      free_inlines(e->content.inlines); -      break; -    default: -      break; -    } -    next = e->next; -    free(e); -    e = next; -  } +	inl * next; +	while (e != NULL) { +		switch (e->tag){ +			case str: +			case raw_html: +			case code: +			case entity: +				chunk_free(&e->content.literal); +				break; +			case linebreak: +			case softbreak: +				break; +			case link: +			case image: +				free(e->content.linkable.url); +				free(e->content.linkable.title); +				free_inlines(e->content.linkable.label); +				break; +			case emph: +			case strong: +				free_inlines(e->content.inlines); +				break; +			default: +				break; +		} +		next = e->next; +		free(e); +		e = next; +	}  }  // Append inline list b to the end of inline list a.  // Return pointer to head of new list.  inline static inl* append_inlines(inl* a, inl* b)  { -  if (a == NULL) {  // NULL acts like an empty list -    return b; -  } -  inl* cur = a; -  while (cur->next) { -    cur = cur->next; -  } -  cur->next = b; -  return a; +	if (a == NULL) {  // NULL acts like an empty list +		return b; +	} +	inl* cur = a; +	while (cur->next) { +		cur = cur->next; +	} +	cur->next = b; +	return a;  }  // Make a 'subject' from an input string. -static subject* make_subject(bstring s, reference** refmap) +static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap)  { -  subject* e = (subject*) malloc(sizeof(subject)); -  // remove final whitespace -  brtrimws(s); -  e->buffer = s; -  e->pos = 0; -  e->label_nestlevel = 0; -  e->reference_map = refmap; -  return e; +	e->buffer = buffer; +	e->pos = input_pos; +	e->label_nestlevel = 0; +	e->reference_map = refmap;  }  inline static int isbacktick(int c)  { -  return (c == '`'); +	return (c == '`'); +} + +inline static void chunk_free(chunk *c) +{ +	if (c->alloc) +		free((char *)c->data); + +	c->data = NULL; +	c->alloc = 0; +	c->len = 0; +} + +inline static void chunk_trim(chunk *c) +{ +	while (c->len && isspace(c->data[0])) { +		c->data++; +		c->len--; +	} + +	while (c->len > 0) { +		if (!isspace(c->data[c->len - 1])) +			break; + +		c->len--; +	} +} + +inline static unsigned char *chunk_to_cstr(chunk *c) +{ +	unsigned char *str; + +	str = malloc(c->len + 1); +	memcpy(str, c->data, c->len); +	str[c->len] = 0; + +	return str; +} + +inline static chunk chunk_literal(const char *data) +{ +	chunk c = {data, strlen(data), 0}; +	return c; +} + +inline static chunk chunk_buf(const gh_buf *buf, int pos, int len) +{ +	chunk c = {buf->ptr + pos, len, 0}; +	return c; +} + +inline static chunk chunk_buf_detach(gh_buf *buf) +{ +	chunk c; + +	c.len = buf->size; +	c.data = gh_buf_detach(buf); +	c.alloc = 1; + +	return c;  }  // Return the next character in the subject, without advancing.  // Return 0 if at the end of the subject. -#define peek_char(subj) bchar(subj->buffer, subj->pos) +#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos)  // Return true if there are more characters in the subject.  inline static int is_eof(subject* subj)  { -  return (subj->pos >= blength(subj->buffer)); +	return (subj->pos >= gh_buf_len(subj->buffer));  }  // Advance the subject.  Doesn't check for eof. -#define advance(subj) subj->pos += 1 +#define advance(subj) (subj)->pos += 1  // Take characters while a predicate holds, and return a string. -inline static bstring take_while(subject* subj, int (*f)(int)) +inline static chunk take_while(subject* subj, int (*f)(int))  { -  unsigned char c; -  int startpos = subj->pos; -  int len = 0; -  while ((c = peek_char(subj)) && (*f)(c)) { -    advance(subj); -    len++; -  } -  return bmidstr(subj->buffer, startpos, len); -} +	unsigned char c; +	int startpos = subj->pos; +	int len = 0; -// Take one character and return a string, or NULL if eof. -inline static bstring take_one(subject* subj) -{ -  int startpos = subj->pos; -  if (is_eof(subj)){ -    return NULL; -  } else { -    advance(subj); -    return bmidstr(subj->buffer, startpos, 1); -  } +	while ((c = peek_char(subj)) && (*f)(c)) { +		advance(subj); +		len++; +	} + +	return chunk_buf(subj->buffer, startpos, len);  }  // Try to process a backtick code span that began with a @@ -255,381 +320,406 @@ inline static bstring take_one(subject* subj)  // after the closing backticks.  static int scan_to_closing_backticks(subject* subj, int openticklength)  { -  // read non backticks -  char c; -  while ((c = peek_char(subj)) && c != '`') { -    advance(subj); -  } -  if (is_eof(subj)) { -    return 0;  // did not find closing ticks, return 0 -  } -  int numticks = 0; -  while (peek_char(subj) == '`') { -      advance(subj); -      numticks++; -  } -  if (numticks != openticklength){ -    return(scan_to_closing_backticks(subj, openticklength)); -  } -  return (subj->pos); -} - -// Destructively modify bstring, collapsing consecutive +	// read non backticks +	char c; +	while ((c = peek_char(subj)) && c != '`') { +		advance(subj); +	} +	if (is_eof(subj)) { +		return 0;  // did not find closing ticks, return 0 +	} +	int numticks = 0; +	while (peek_char(subj) == '`') { +		advance(subj); +		numticks++; +	} +	if (numticks != openticklength){ +		return(scan_to_closing_backticks(subj, openticklength)); +	} +	return (subj->pos); +} + +// Destructively modify string, collapsing consecutive  // space and newline characters into a single space. -static int normalize_whitespace(bstring s) -{ -  bool last_char_was_space = false; -  int pos = 0; -  char c; -  while ((c = bchar(s, pos))) { -    switch (c) { -    case ' ': -      if (last_char_was_space) { -        bdelete(s, pos, 1); -      } else { -        pos++; -      } -      last_char_was_space = true; -      break; -    case '\n': -      if (last_char_was_space) { -        bdelete(s, pos, 1); -      } else { -        bdelete(s, pos, 1); -        binsertch(s, pos, 1, ' '); -        pos++; -      } -      last_char_was_space = true; -      break; -    default: -      pos++; -      last_char_was_space = false; -    } -  } -  return 0; +static void normalize_whitespace(gh_buf *s) +{ +	/* TODO */ +#if 0 +	bool last_char_was_space = false; +	int pos = 0; +	char c; +	while ((c = gh_buf_at(s, pos))) { +		switch (c) { +			case ' ': +				if (last_char_was_space) { +					bdelete(s, pos, 1); +				} else { +					pos++; +				} +				last_char_was_space = true; +				break; +			case '\n': +				if (last_char_was_space) { +					bdelete(s, pos, 1); +				} else { +					bdelete(s, pos, 1); +					binsertch(s, pos, 1, ' '); +					pos++; +				} +				last_char_was_space = true; +				break; +			default: +				pos++; +				last_char_was_space = false; +		} +	} +#endif  }  // Parse backtick code section or raw backticks, return an inline.  // Assumes that the subject has a backtick at the current position.  static inl* handle_backticks(subject *subj)  { -  bstring openticks = take_while(subj, isbacktick); -  bstring result; -  int ticklength = blength(openticks); -  int startpos = subj->pos; -  int endpos = scan_to_closing_backticks(subj, ticklength); -  if (endpos == 0) { // not found -    subj->pos = startpos; // rewind -    return make_str(openticks); -  } else { -    bdestroy(openticks); -    result = bmidstr(subj->buffer, startpos, endpos - startpos - ticklength); -    btrimws(result); -    normalize_whitespace(result); -    return make_code(result); -  } +	chunk openticks = take_while(subj, isbacktick); +	int startpos = subj->pos; +	int endpos = scan_to_closing_backticks(subj, openticks.len); + +	if (endpos == 0) { // not found +		subj->pos = startpos; // rewind +		return make_str(openticks); +	} else { +		gh_buf buf = GH_BUF_INIT; + +		gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len); +		gh_buf_trim(&buf); +		normalize_whitespace(&buf); + +		return make_code(chunk_buf_detach(&buf)); +	}  }  // Scan ***, **, or * and return number scanned, or 0.  // Don't advance position.  static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)  { -  int numdelims = 0; -  char char_before, char_after; -  int startpos = subj->pos; - -  char_before = subj->pos == 0 ? '\n' : bchar(subj->buffer, subj->pos - 1); -  while (peek_char(subj) == c) { -    numdelims++; -    advance(subj); -  } -  char_after = peek_char(subj); -  *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after); -  *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before); -  if (c == '_') { -    *can_open = *can_open && !isalnum(char_before); -    *can_close = *can_close && !isalnum(char_after); -  } -  subj->pos = startpos; -  return numdelims; +	int numdelims = 0; +	char char_before, char_after; +	int startpos = subj->pos; + +	char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1); +	while (peek_char(subj) == c) { +		numdelims++; +		advance(subj); +	} +	char_after = peek_char(subj); +	*can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after); +	*can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before); +	if (c == '_') { +		*can_open = *can_open && !isalnum(char_before); +		*can_close = *can_close && !isalnum(char_after); +	} +	subj->pos = startpos; +	return numdelims;  }  // Parse strong/emph or a fallback.  // Assumes the subject has '_' or '*' at the current position.  static inl* handle_strong_emph(subject* subj, char c)  { -  bool can_open, can_close; -  inl * result = NULL; -  inl ** last = malloc(sizeof(inl *)); -  inl * new; -  inl * il; -  inl * first_head = NULL; -  inl * first_close = NULL; -  int first_close_delims = 0; -  int numdelims; - -  *last = NULL; - -  numdelims = scan_delims(subj, c, &can_open, &can_close); -  subj->pos += numdelims; - -  new = make_str(bmidstr(subj->buffer, subj->pos - numdelims, numdelims)); -  *last = new; -  first_head = new; -  result = new; - -  if (!can_open || numdelims == 0) { -    goto done; -  } - -  switch (numdelims) { -  case 1: -    while (true) { -      numdelims = scan_delims(subj, c, &can_open, &can_close); -      if (numdelims >= 1 && can_close) { -        subj->pos += 1; -        first_head->tag = emph; -        bdestroy(first_head->content.literal); -        first_head->content.inlines = first_head->next; -        first_head->next = NULL; -        goto done; -      } else { -        if (!parse_inline(subj, last)) { -          goto done; -        } -      } -    } -    break; -  case 2: -    while (true) { -      numdelims = scan_delims(subj, c, &can_open, &can_close); -      if (numdelims >= 2 && can_close) { -        subj->pos += 2; -        first_head->tag = strong; -        bdestroy(first_head->content.literal); -        first_head->content.inlines = first_head->next; -        first_head->next = NULL; -        goto done; -      } else { -        if (!parse_inline(subj, last)) { -          goto done; -        } -      } -    } -    break; -  case 3: -    while (true) { -      numdelims = scan_delims(subj, c, &can_open, &can_close); -      if (can_close && numdelims >= 1 && numdelims <= 3 && -          numdelims != first_close_delims) { -        new = make_str(bmidstr(subj->buffer, subj->pos, numdelims)); -        append_inlines(*last, new); -        *last = new; - -        if (first_close_delims == 1 && numdelims > 2) { -          numdelims = 2; -        } else if (first_close_delims == 2) { -          numdelims = 1; -        } else if (numdelims == 3) { -          // If we opened with ***, we interpret it as ** followed by * -          // giving us <strong><em> -          numdelims = 1; -        } - -        subj->pos += numdelims; -        if (first_close) { -          first_head->tag = first_close_delims == 1 ? strong : emph; -          bdestroy(first_head->content.literal); -          first_head->content.inlines = -            make_inlines(first_close_delims == 1 ? emph : strong, -                         first_head->next); - -          il = first_head->next; -          while (il->next && il->next != first_close) { -            il = il->next; -          } -          il->next = NULL; - -          first_head->content.inlines->next = first_close->next; - -          il = first_head->content.inlines; -          while (il->next && il->next != *last) { -            il = il->next; -          } -          il->next = NULL; -          free_inlines(*last); - -          first_close->next = NULL; -          free_inlines(first_close); -          first_head->next = NULL; -          goto done; -        } else { -          first_close = *last; -          first_close_delims = numdelims; -        } -      } else { -        if (!parse_inline(subj, last)) { -          goto done; -        } -      } -    } -    break; -  default: -    goto done; -  } - - done: -  free(last); -  return result; +	bool can_open, can_close; +	inl * result = NULL; +	inl ** last = malloc(sizeof(inl *)); +	inl * new; +	inl * il; +	inl * first_head = NULL; +	inl * first_close = NULL; +	int first_close_delims = 0; +	int numdelims; + +	*last = NULL; + +	numdelims = scan_delims(subj, c, &can_open, &can_close); +	subj->pos += numdelims; + +	new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims)); +	*last = new; +	first_head = new; +	result = new; + +	if (!can_open || numdelims == 0) { +		goto done; +	} + +	switch (numdelims) { +		case 1: +			while (true) { +				numdelims = scan_delims(subj, c, &can_open, &can_close); +				if (numdelims >= 1 && can_close) { +					subj->pos += 1; +					first_head->tag = emph; +					chunk_free(&first_head->content.literal); +					first_head->content.inlines = first_head->next; +					first_head->next = NULL; +					goto done; +				} else { +					if (!parse_inline(subj, last)) { +						goto done; +					} +				} +			} +			break; +		case 2: +			while (true) { +				numdelims = scan_delims(subj, c, &can_open, &can_close); +				if (numdelims >= 2 && can_close) { +					subj->pos += 2; +					first_head->tag = strong; +					chunk_free(&first_head->content.literal); +					first_head->content.inlines = first_head->next; +					first_head->next = NULL; +					goto done; +				} else { +					if (!parse_inline(subj, last)) { +						goto done; +					} +				} +			} +			break; +		case 3: +			while (true) { +				numdelims = scan_delims(subj, c, &can_open, &can_close); +				if (can_close && numdelims >= 1 && numdelims <= 3 && +						numdelims != first_close_delims) { +					new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims)); +					append_inlines(*last, new); +					*last = new; +					if (first_close_delims == 1 && numdelims > 2) { +						numdelims = 2; +					} else if (first_close_delims == 2) { +						numdelims = 1; +					} else if (numdelims == 3) { +						// If we opened with ***, we interpret it as ** followed by * +						// giving us <strong><em> +						numdelims = 1; +					} +					subj->pos += numdelims; +					if (first_close) { +						first_head->tag = first_close_delims == 1 ? strong : emph; +						chunk_free(&first_head->content.literal); +						first_head->content.inlines = +							make_inlines(first_close_delims == 1 ? emph : strong, +									first_head->next); + +						il = first_head->next; +						while (il->next && il->next != first_close) { +							il = il->next; +						} +						il->next = NULL; + +						first_head->content.inlines->next = first_close->next; + +						il = first_head->content.inlines; +						while (il->next && il->next != *last) { +							il = il->next; +						} +						il->next = NULL; +						free_inlines(*last); + +						first_close->next = NULL; +						free_inlines(first_close); +						first_head->next = NULL; +						goto done; +					} else { +						first_close = *last; +						first_close_delims = numdelims; +					} +				} else { +					if (!parse_inline(subj, last)) { +						goto done; +					} +				} +			} +			break; +		default: +			goto done; +	} + +done: +	free(last); +	return result;  }  // Parse backslash-escape or just a backslash, returning an inline.  static inl* handle_backslash(subject *subj)  { -  advance(subj); -  unsigned char nextchar = peek_char(subj); -  if (ispunct(nextchar)) {  // only ascii symbols and newline can be escaped -    advance(subj); -    return make_str(bformat("%c", nextchar)); -  } else if (nextchar == '\n') { -    advance(subj); -    return make_linebreak(); -  } else { -    return make_str(bfromcstr("\\")); -  } +	advance(subj); +	unsigned char nextchar = peek_char(subj); +	if (ispunct(nextchar)) {  // only ascii symbols and newline can be escaped +		advance(subj); +		return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1)); +	} else if (nextchar == '\n') { +		advance(subj); +		return make_linebreak(); +	} else { +		return make_str(chunk_literal("\\")); +	}  }  // Parse an entity or a regular "&" string.  // Assumes the subject has an '&' character at the current position.  static inl* handle_entity(subject* subj)  { -  int match; -  inl * result; -  match = scan_entity(subj->buffer, subj->pos); -  if (match) { -    result = make_entity(bmidstr(subj->buffer, subj->pos, match)); -    subj->pos += match; -  } else { -    advance(subj); -    result = make_str(bfromcstr("&")); -  } -  return result; +	int match; +	inl *result; +	match = scan_entity(subj->buffer, subj->pos); +	if (match) { +		result = make_entity(chunk_buf(subj->buffer, subj->pos, match)); +		subj->pos += match; +	} else { +		advance(subj); +		result = make_str(chunk_literal("&")); +	} +	return result;  }  // Like make_str, but parses entities.  // Returns an inline sequence consisting of str and entity elements. -static inl * make_str_with_entities(bstring s) -{ -  inl * result = NULL; -  inl * new; -  int searchpos; -  char c; -  subject * subj = make_subject(s, NULL); - -  while ((c = peek_char(subj))) { -    switch (c) { -    case '&': -      new = handle_entity(subj); -      break; -    default: -      searchpos = bstrchrp(subj->buffer, '&', subj->pos); -      if (searchpos == BSTR_ERR) { -        searchpos = blength(subj->buffer); -      } -      new = make_str(bmidstr(subj->buffer, subj->pos, searchpos - subj->pos)); -      subj->pos = searchpos; -    } -    result = append_inlines(result, new); -  } -  free(subj); -  return result; +static inl *make_str_with_entities(chunk *content) +{ +	inl * result = NULL; +	inl * new; +	int searchpos; +	char c; +	subject subj; +	gh_buf content_buf = GH_BUF_INIT; + +	gh_buf_set(&content_buf, content->data, content->len); +	init_subject(&subj, &content_buf, 0, NULL); + +	while ((c = peek_char(&subj))) { +		switch (c) { +			case '&': +				new = handle_entity(&subj); +				break; +			default: +				searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos); +				if (searchpos < 0) { +					searchpos = gh_buf_len(subj.buffer); +				} + +				new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos)); +				subj.pos = searchpos; +		} +		result = append_inlines(result, new); +	} + +	gh_buf_free(&content_buf); +	return result;  }  // Destructively unescape a string: remove backslashes before punctuation chars. -extern int unescape(bstring url) +extern void unescape_buffer(gh_buf *buf)  { -  // remove backslashes before punctuation chars: -  int searchpos = 0; -  while ((searchpos = bstrchrp(url, '\\', searchpos)) != BSTR_ERR) { -    if (ispunct(bchar(url, searchpos + 1))) { -      bdelete(url, searchpos, 1); -    } else { -      searchpos++; -    } -  } -  return 0; +	int r, w; + +	for (r = 0, w = 0; r < buf->size; ++r) { +		if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1])) +			continue; + +		buf->ptr[w++] = buf->ptr[r]; +	} + +	gh_buf_truncate(buf, w);  }  // Clean a URL: remove surrounding whitespace and surrounding <>,  // and remove \ that escape punctuation. -static int clean_url(bstring url) +static unsigned char *clean_url(chunk *url)  { -  // remove surrounding <> if any: -  int urllength = blength(url); -  btrimws(url); -  if (bchar(url, 0) == '<' && bchar(url, urllength - 1) == '>') { -    bdelete(url, 0, 1); -    bdelete(url, urllength - 2, 1); -  } -  unescape(url); -  return 0; +	gh_buf buf = GH_BUF_INIT; + +	chunk_trim(url); + +	if (url->data[0] == '<' && url->data[url->len - 1] == '>') { +		gh_buf_set(&buf, url->data + 1, url->len - 2); +	} else { +		gh_buf_set(&buf, url->data, url->len); +	} + +	unescape_buffer(&buf); +	return gh_buf_detach(&buf);  }  // Clean a title: remove surrounding quotes and remove \ that escape punctuation. -static int clean_title(bstring title) +static unsigned char *clean_title(chunk *title)  { -  // remove surrounding quotes if any: -  int titlelength = blength(title); -  if ((bchar(title, 0) == '\'' && bchar(title, titlelength - 1) == '\'') || -      (bchar(title, 0) == '(' && bchar(title, titlelength - 1) == ')') || -      (bchar(title, 0) == '"' && bchar(title, titlelength - 1) == '"')) { -    bdelete(title, 0, 1); -    bdelete(title, titlelength - 2, 1); -  } -  unescape(title); -  return 0; +	gh_buf buf = GH_BUF_INIT; +	unsigned char first = title->data[0]; +	unsigned char last = title->data[title->len - 1]; + +	// remove surrounding quotes if any: +	if ((first == '\'' && last == '\'') || +		(first == '(' && last == ')') || +		(first == '"' && last == '"')) { +		gh_buf_set(&buf, title->data + 1, title->len - 2); +	} else { +		gh_buf_set(&buf, title->data, title->len); +	} + +	unescape_buffer(&buf); +	return gh_buf_detach(&buf);  }  // Parse an autolink or HTML tag.  // Assumes the subject has a '<' character at the current position.  static inl* handle_pointy_brace(subject* subj)  { -  int matchlen = 0; -  bstring contents; -  inl* result; - -  advance(subj);  // advance past first < -  // first try to match a URL autolink -  matchlen = scan_autolink_uri(subj->buffer, subj->pos); -  if (matchlen > 0) { -    contents = bmidstr(subj->buffer, subj->pos, matchlen - 1); -    subj->pos += matchlen; -    result =  make_link(make_str_with_entities(contents), -                        bstrcpy(contents), bfromcstr("")); -    bdestroy(contents); -    return result; -  } -  // next try to match an email autolink -  matchlen = scan_autolink_email(subj->buffer, subj->pos); -  if (matchlen > 0) { -    contents = bmidstr(subj->buffer, subj->pos, matchlen - 1); -    subj->pos += matchlen; -    result = make_link(make_str_with_entities(contents), -                       bformat("mailto:%s", contents->data), -                       bfromcstr("")); -    bdestroy(contents); -    return result; -  } -  // finally, try to match an html tag -  matchlen = scan_html_tag(subj->buffer, subj->pos); -  if (matchlen > 0) { -    contents = bmidstr(subj->buffer, subj->pos, matchlen); -    binsertch(contents, 0, 1, '<'); -    subj->pos += matchlen; -    return make_raw_html(contents); -  } else {// if nothing matches, just return the opening <: -    return make_str(bfromcstr("<")); -  } +	int matchlen = 0; +	chunk contents; + +	advance(subj);  // advance past first < + +	// first try to match a URL autolink +	matchlen = scan_autolink_uri(subj->buffer, subj->pos); +	if (matchlen > 0) { +		contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); +		subj->pos += matchlen; + +		return make_link( +			make_str_with_entities(&contents), +			contents, +			chunk_literal("") +		); +	} + +	// next try to match an email autolink +	matchlen = scan_autolink_email(subj->buffer, subj->pos); +	if (matchlen > 0) { +		gh_buf mail_url = GH_BUF_INIT; + +		contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); +		subj->pos += matchlen; + +		gh_buf_puts(&mail_url, "mailto:"); +		gh_buf_put(&mail_url, contents.data, contents.len); + +		return make_link( +				make_str_with_entities(&contents), +				chunk_buf_detach(&mail_url), +				chunk_literal("") +		); +	} + +	// finally, try to match an html tag +	matchlen = scan_html_tag(subj->buffer, subj->pos); +	if (matchlen > 0) { +		contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1); +		subj->pos += matchlen; +		return make_raw_html(contents); +	} + +	// if nothing matches, just return the opening <: +	return make_str(chunk_literal("<"));  }  // Parse a link label.  Returns 1 if successful. @@ -641,366 +731,381 @@ static inl* handle_pointy_brace(subject* subj)  // markers. So, 2 below contains a link while 1 does not:  // 1. [a link `with a ](/url)` character  // 2. [a link *with emphasized ](/url) text* -static int link_label(subject* subj, bstring* raw_label) -{ -  int nestlevel = 0; -  inl* tmp = NULL; -  bstring raw; -  int startpos = subj->pos; -  if (subj->label_nestlevel) { -    // if we've already checked to the end of the subject -    // for a label, even with a different starting [, we -    // know we won't find one here and we can just return. -    // Note:  nestlevel 1 would be: [foo [bar] -    // nestlevel 2 would be: [foo [bar [baz] -    subj->label_nestlevel--; -    return 0; -  } -  advance(subj);  // advance past [ -  char c; -  while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { -    switch (c) { -    case '`': -      tmp = handle_backticks(subj); -      free_inlines(tmp); -      break; -    case '<': -      tmp = handle_pointy_brace(subj); -      free_inlines(tmp); -      break; -    case '[':  // nested [] -      nestlevel++; -      advance(subj); -      break; -    case ']':  // nested [] -      nestlevel--; -      advance(subj); -      break; -    case '\\': -      advance(subj); -      if (ispunct(peek_char(subj))) { -        advance(subj); -      } -      break; -    default: -      advance(subj); -    } -  } -  if (c == ']') { -    if (raw_label != NULL) { -      raw = bmidstr(subj->buffer, startpos + 1, subj->pos - (startpos + 1)); -      *raw_label = raw; -    } -    subj->label_nestlevel = 0; -    advance(subj);  // advance past ] -    return 1; -  } else { -    if (c == 0) { -      subj->label_nestlevel = nestlevel; -    } -    subj->pos = startpos; // rewind -    return 0; -  } +static int link_label(subject* subj, chunk *raw_label) +{ +	int nestlevel = 0; +	inl* tmp = NULL; +	int startpos = subj->pos; + +	if (subj->label_nestlevel) { +		// if we've already checked to the end of the subject +		// for a label, even with a different starting [, we +		// know we won't find one here and we can just return. +		// Note:  nestlevel 1 would be: [foo [bar] +		// nestlevel 2 would be: [foo [bar [baz] +		subj->label_nestlevel--; +		return 0; +	} + +	advance(subj);  // advance past [ +	char c; +	while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { +		switch (c) { +			case '`': +				tmp = handle_backticks(subj); +				free_inlines(tmp); +				break; +			case '<': +				tmp = handle_pointy_brace(subj); +				free_inlines(tmp); +				break; +			case '[':  // nested [] +				nestlevel++; +				advance(subj); +				break; +			case ']':  // nested [] +				nestlevel--; +				advance(subj); +				break; +			case '\\': +				advance(subj); +				if (ispunct(peek_char(subj))) { +					advance(subj); +				} +				break; +			default: +				advance(subj); +		} +	} +	if (c == ']') { +		*raw_label = chunk_buf( +			subj->buffer, +			startpos + 1, +			subj->pos - (startpos + 1) +		); + +		subj->label_nestlevel = 0; +		advance(subj);  // advance past ] +		return 1; +	} else { +		if (c == 0) { +			subj->label_nestlevel = nestlevel; +		} +		subj->pos = startpos; // rewind +		return 0; +	}  }  // Parse a link or the link portion of an image, or return a fallback.  static inl* handle_left_bracket(subject* subj)  { -  inl* lab = NULL; -  inl* result = NULL; -  reference* ref; -  int n; -  int sps; -  int found_label; -  int endlabel, starturl, endurl, starttitle, endtitle, endall; -  bstring url, title, rawlabel, reflabel; -  bstring rawlabel2 = NULL; -  found_label = link_label(subj, &rawlabel); -  endlabel = subj->pos; -  if (found_label) { -    if (peek_char(subj) == '(' && -        ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) && -        ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) { -      // try to parse an explicit link: -      starturl = subj->pos + 1 + sps; // after ( -      endurl = starturl + n; -      starttitle = endurl + scan_spacechars(subj->buffer, endurl); -      // ensure there are spaces btw url and title -      endtitle = (starttitle == endurl) ? starttitle : -                 starttitle + scan_link_title(subj->buffer, starttitle); -      endall = endtitle + scan_spacechars(subj->buffer, endtitle); -      if (bchar(subj->buffer, endall) == ')') { -        subj->pos = endall + 1; -        url = bmidstr(subj->buffer, starturl, endurl - starturl); -        clean_url(url); -        title = bmidstr(subj->buffer, starttitle, endtitle - starttitle); -        clean_title(title); -        lab = parse_inlines(rawlabel, NULL); -        bdestroy(rawlabel); -        return make_link(lab, url, title); -      } else { -        // if we get here, we matched a label but didn't get further: -        subj->pos = endlabel; -        lab = parse_inlines(rawlabel, subj->reference_map); -        bdestroy(rawlabel); -        result = append_inlines(make_str(bfromcstr("[")), -                                append_inlines(lab, -                                               make_str(bfromcstr("]")))); -        return result; -      } -    } else { -      // Check for reference link. -      // First, see if there's another label: -      subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel); -      reflabel = rawlabel; -      // if followed by a nonempty link label, we change reflabel to it: -      if (peek_char(subj) == '[' && -          link_label(subj, &rawlabel2)) { -        if (blength(rawlabel2) > 0) { -          reflabel = rawlabel2; -        } -      } else { -        subj->pos = endlabel; -      } -      // lookup rawlabel in subject->reference_map: -      ref = lookup_reference(subj->reference_map, reflabel); -      if (ref != NULL) { // found -        lab = parse_inlines(rawlabel, NULL); -        result = make_link(lab, bstrcpy(ref->url), bstrcpy(ref->title)); -      } else { -        subj->pos = endlabel; -        lab = parse_inlines(rawlabel, subj->reference_map); -        result = append_inlines(make_str(bfromcstr("[")), -                               append_inlines(lab, make_str(bfromcstr("]")))); -      } -      bdestroy(rawlabel); -      bdestroy(rawlabel2); -      return result; -    } -  } -  // If we fall through to here, it means we didn't match a link: -  advance(subj);  // advance past [ -  return make_str(bfromcstr("[")); +	inl *lab = NULL; +	inl *result = NULL; +	reference *ref; +	int n; +	int sps; +	int found_label; +	int endlabel, starturl, endurl, starttitle, endtitle, endall; + +	chunk rawlabel; +	chunk url, title; + +	found_label = link_label(subj, &rawlabel); +	endlabel = subj->pos; + +	if (found_label) { +		if (peek_char(subj) == '(' && +				((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) && +				((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) { + +			// try to parse an explicit link: +			starturl = subj->pos + 1 + sps; // after ( +			endurl = starturl + n; +			starttitle = endurl + scan_spacechars(subj->buffer, endurl); + +			// ensure there are spaces btw url and title +			endtitle = (starttitle == endurl) ? starttitle : +				starttitle + scan_link_title(subj->buffer, starttitle); + +			endall = endtitle + scan_spacechars(subj->buffer, endtitle); + +			if (gh_buf_at(subj->buffer, endall) == ')') { +				subj->pos = endall + 1; + +				url = chunk_buf(subj->buffer, starturl, endurl - starturl); +				title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle); +				lab = parse_chunk_inlines(&rawlabel, NULL); + +				return make_link(lab, url, title); +			} else { +				// if we get here, we matched a label but didn't get further: +				subj->pos = endlabel; +				lab = parse_chunk_inlines(&rawlabel, subj->reference_map); +				result = append_inlines(make_str(chunk_literal("[")), +						append_inlines(lab, +							make_str(chunk_literal("]")))); +				return result; +			} +		} else { +			chunk rawlabel_tmp; +			chunk reflabel; + +			// Check for reference link. +			// First, see if there's another label: +			subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel); +			reflabel = rawlabel; + +			// if followed by a nonempty link label, we change reflabel to it: +			if (peek_char(subj) == '[' && link_label(subj, &rawlabel_tmp)) { +				if (rawlabel_tmp.len > 0) +					reflabel = rawlabel_tmp; +			} else { +				subj->pos = endlabel; +			} + +			// lookup rawlabel in subject->reference_map: +			ref = lookup_reference(subj->reference_map, &reflabel); +			if (ref != NULL) { // found +				lab = parse_chunk_inlines(&rawlabel, NULL); +				result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title)); +			} else { +				subj->pos = endlabel; +				lab = parse_chunk_inlines(&rawlabel, subj->reference_map); +				result = append_inlines(make_str(chunk_literal("[")), +						append_inlines(lab, make_str(chunk_literal("]")))); +			} +			return result; +		} +	} +	// If we fall through to here, it means we didn't match a link: +	advance(subj);  // advance past [ +	return make_str(chunk_literal("["));  }  // Parse a hard or soft linebreak, returning an inline.  // Assumes the subject has a newline at the current position.  static inl* handle_newline(subject *subj)  { -  int nlpos = subj->pos; -  // skip over newline -  advance(subj); -  // skip spaces at beginning of line -  while (peek_char(subj) == ' ') { -    advance(subj); -  } -  if (nlpos > 1 && -      bchar(subj->buffer, nlpos - 1) == ' ' && -      bchar(subj->buffer, nlpos - 2) == ' ') { -    return make_linebreak(); -  } else { -    return make_softbreak(); -  } +	int nlpos = subj->pos; +	// skip over newline +	advance(subj); +	// skip spaces at beginning of line +	while (peek_char(subj) == ' ') { +		advance(subj); +	} +	if (nlpos > 1 && +			gh_buf_at(subj->buffer, nlpos - 1) == ' ' && +			gh_buf_at(subj->buffer, nlpos - 2) == ' ') { +		return make_linebreak(); +	} else { +		return make_softbreak(); +	}  }  inline static int not_eof(subject* subj)  { -  return !is_eof(subj); +	return !is_eof(subj);  }  // Parse inlines while a predicate is satisfied.  Return inlines.  extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))  { -  inl* result = NULL; -  inl** last = &result; -  while ((*f)(subj) && parse_inline(subj, last)) { -  } -  return result; +	inl* result = NULL; +	inl** last = &result; +	while ((*f)(subj) && parse_inline(subj, last)) { +	} +	return result; +} + +inl *parse_chunk_inlines(chunk *chunk, reference** refmap) +{ +	inl *result; +	subject subj; +	gh_buf full_chunk = GH_BUF_INIT; + +	gh_buf_set(&full_chunk, chunk->data, chunk->len); +	init_subject(&subj, &full_chunk, 0, refmap); +	result = parse_inlines_while(&subj, not_eof); + +	gh_buf_free(&full_chunk); +	return result; +} + +static int find_special_char(subject *subj) +{ +	int n = subj->pos + 1; +	int size = (int)gh_buf_len(subj->buffer); + +	while (n < size) { +		if (strchr("\n\\`&_*[]<!", gh_buf_at(subj->buffer, n))) +			return n; +	} + +	return -1;  }  // Parse an inline, advancing subject, and add it to last element.  // Adjust tail to point to new last element of list.  // Return 0 if no inline can be parsed, 1 otherwise. -extern int parse_inline(subject* subj, inl ** last) -{ -  inl* new = NULL; -  bstring contents; -  bstring special_chars; -  unsigned char c; -  int endpos; -  c = peek_char(subj); -  if (c == 0) { -    return 0; -  } -  switch(c){ -    case '\n': -      new = handle_newline(subj); -      break; -    case '`': -      new = handle_backticks(subj); -      break; -    case '\\': -      new = handle_backslash(subj); -      break; -    case '&': -      new = handle_entity(subj); -      break; -    case '<': -      new = handle_pointy_brace(subj); -      break; -    case '_': -      if (subj->pos > 0 && (isalnum(bchar(subj->buffer, subj->pos - 1)) || -                            bchar(subj->buffer, subj->pos - 1) == '_')) { -        new = make_str(take_one(subj)); -      } else { -        new = handle_strong_emph(subj, '_'); -      } -      break; -    case '*': -      new = handle_strong_emph(subj, '*'); -      break; -    case '[': -      new = handle_left_bracket(subj); -      break; -    case '!': -      advance(subj); -      if (peek_char(subj) == '[') { -        new = handle_left_bracket(subj); -        if (new != NULL && new->tag == link) { -          new->tag = image; -        } else { -          new = append_inlines(make_str(bfromcstr("!")), new); -        } -      } else { -        new = make_str(bfromcstr("!")); -      } -      break; -    default: -      // we read until we hit a special character -      special_chars = bfromcstr("\n\\`&_*[]<!"); -      endpos = binchr(subj->buffer, subj->pos, special_chars); -      bdestroy(special_chars); -      if (endpos == subj->pos) { -        // current char is special: read a 1-character str -        contents = take_one(subj); -      } else if (endpos == BSTR_ERR) { -        // special char not found, take whole rest of buffer: -        endpos = subj->buffer->slen; -        contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos); -        subj->pos = endpos; -      } else { -        // take buffer from subj->pos to endpos to str. -        contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos); -        subj->pos = endpos; -        // if we're at a newline, strip trailing spaces. -        if (peek_char(subj) == '\n') { -          brtrimws(contents); -        } -      } -      new = make_str(contents); -  } -  if (*last == NULL) { -    *last = new; -  } else { -    append_inlines(*last, new); -  } -  return 1; -} - -extern inl* parse_inlines(bstring input, reference** refmap) -{ -  subject * subj = make_subject(input, refmap); -  inl * result = parse_inlines_while(subj, not_eof); -  free(subj); -  return result; +static int parse_inline(subject* subj, inl ** last) +{ +	inl* new = NULL; +	chunk contents; +	unsigned char c; +	int endpos; +	c = peek_char(subj); +	if (c == 0) { +		return 0; +	} +	switch(c){ +		case '\n': +			new = handle_newline(subj); +			break; +		case '`': +			new = handle_backticks(subj); +			break; +		case '\\': +			new = handle_backslash(subj); +			break; +		case '&': +			new = handle_entity(subj); +			break; +		case '<': +			new = handle_pointy_brace(subj); +			break; +		case '_': +			if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) || +						gh_buf_at(subj->buffer, subj->pos - 1) == '_')) { +				goto text_literal; +			} + +			new = handle_strong_emph(subj, '_'); +			break; +		case '*': +			new = handle_strong_emph(subj, '*'); +			break; +		case '[': +			new = handle_left_bracket(subj); +			break; +		case '!': +			advance(subj); +			if (peek_char(subj) == '[') { +				new = handle_left_bracket(subj); +				if (new != NULL && new->tag == link) { +					new->tag = image; +				} else { +					new = append_inlines(make_str(chunk_literal("!")), new); +				} +			} else { +				new = make_str(chunk_literal("!")); +			} +			break; +		default: +		text_literal: +			endpos = find_special_char(subj); +			if (endpos < 0) { +				endpos = gh_buf_len(subj->buffer); +			} + +			contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos); +			subj->pos = endpos; + +			// if we're at a newline, strip trailing spaces. +			if (peek_char(subj) == '\n') { +				chunk_trim(&contents); +			} + +			new = make_str(contents); +	} +	if (*last == NULL) { +		*last = new; +	} else { +		append_inlines(*last, new); +	} +	return 1; +} + +extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap) +{ +	subject subj; +	init_subject(&subj, input, input_pos, refmap); +	return parse_inlines_while(&subj, not_eof);  }  // Parse zero or more space characters, including at most one newline.  void spnl(subject* subj)  { -  bool seen_newline = false; -  while (peek_char(subj) == ' ' || -         (!seen_newline && -          (seen_newline = peek_char(subj) == '\n'))) { -    advance(subj); -  } +	bool seen_newline = false; +	while (peek_char(subj) == ' ' || +			(!seen_newline && +			 (seen_newline = peek_char(subj) == '\n'))) { +		advance(subj); +	}  }  // Parse reference.  Assumes string begins with '[' character.  // Modify refmap if a reference is encountered.  // Return 0 if no reference found, otherwise position of subject  // after reference is parsed. -extern int parse_reference(bstring input, reference** refmap) -{ -  subject * subj = make_subject(input, NULL); -  bstring lab = NULL; -  bstring url = NULL; -  bstring title = NULL; -  int matchlen = 0; -  int beforetitle; -  reference * new = NULL; -  int newpos; - -  // parse label: -  if (!link_label(subj, &lab)) { -    free(subj); -    return 0; -  } -  // colon: -  if (peek_char(subj) == ':') { -    advance(subj); -  } else { -    free(subj); -    bdestroy(lab); -    return 0; -  } -  // parse link url: -  spnl(subj); -  matchlen = scan_link_url(subj->buffer, subj->pos); -  if (matchlen) { -    url = bmidstr(subj->buffer, subj->pos, matchlen); -    clean_url(url); -    subj->pos += matchlen; -  } else { -    free(subj); -    bdestroy(lab); -    bdestroy(url); -    return 0; -  } -  // parse optional link_title -  beforetitle = subj->pos; -  spnl(subj); -  matchlen = scan_link_title(subj->buffer, subj->pos); -  if (matchlen) { -    title = bmidstr(subj->buffer, subj->pos, matchlen); -    clean_title(title); -    subj->pos += matchlen; -  } else { -    subj->pos = beforetitle; -    title = bfromcstr(""); -  } -  // parse final spaces and newline: -  while (peek_char(subj) == ' ') { -    advance(subj); -  } -  if (peek_char(subj) == '\n') { -    advance(subj); -  } else if (peek_char(subj) != 0) { -    free(subj); -    bdestroy(lab); -    bdestroy(url); -    bdestroy(title); -    return 0; -  } -  // insert reference into refmap -  new = make_reference(lab, url, title); -  add_reference(refmap, new); - -  newpos = subj->pos; -  free(subj); -  bdestroy(lab); -  bdestroy(url); -  bdestroy(title); -  return newpos; +extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) +{ +	subject subj; + +	chunk lab; +	chunk url; +	chunk title; + +	int matchlen = 0; +	int beforetitle; +	reference * new = NULL; + +	init_subject(&subj, input, input_pos, NULL); + +	// parse label: +	if (!link_label(&subj, &lab)) +		return 0; + +	// colon: +	if (peek_char(&subj) == ':') { +		advance(&subj); +	} else { +		return 0; +	} + +	// parse link url: +	spnl(&subj); +	matchlen = scan_link_url(subj.buffer, subj.pos); +	if (matchlen) { +		url = chunk_buf(subj.buffer, subj.pos, matchlen); +		subj.pos += matchlen; +	} else { +		return 0; +	} + +	// parse optional link_title +	beforetitle = subj.pos; +	spnl(&subj); +	matchlen = scan_link_title(subj.buffer, subj.pos); +	if (matchlen) { +		title = chunk_buf(subj.buffer, subj.pos, matchlen); +		subj.pos += matchlen; +	} else { +		subj.pos = beforetitle; +		title = chunk_literal(""); +	} +	// parse final spaces and newline: +	while (peek_char(&subj) == ' ') { +		advance(&subj); +	} +	if (peek_char(&subj) == '\n') { +		advance(&subj); +	} else if (peek_char(&subj) != 0) { +		return 0; +	} +	// insert reference into refmap +	new = make_reference(&lab, &url, &title); +	add_reference(refmap, new); + +	return subj.pos;  } @@ -88,7 +88,7 @@ int main(int argc, char *argv[]) {      print_blocks(cur, 0);    } else {      check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML"); -    printf("%s", html->data); +    // printf("%s", html->data);      bdestroy(html);    }    free_blocks(cur); diff --git a/src/scanners.h b/src/scanners.h index 71e0520..b6e586b 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -1,15 +1,15 @@ -#include "bstrlib.h" +#include "buffer.h" -int scan_autolink_uri(bstring s, int pos); -int scan_autolink_email(bstring s, int pos); -int scan_html_tag(bstring s, int pos); -int scan_html_block_tag(bstring s, int pos); -int scan_link_url(bstring s, int pos); -int scan_link_title(bstring s, int pos); -int scan_spacechars(bstring s, int pos); -int scan_atx_header_start(bstring s, int pos); -int scan_setext_header_line(bstring s, int pos); -int scan_hrule(bstring s, int pos); -int scan_open_code_fence(bstring s, int pos); -int scan_close_code_fence(bstring s, int pos, int len); -int scan_entity(bstring s, int pos); +int scan_autolink_uri(const gh_buf *s, int pos); +int scan_autolink_email(const gh_buf *s, int pos); +int scan_html_tag(const gh_buf *s, int pos); +int scan_html_block_tag(const gh_buf *s, int pos); +int scan_link_url(const gh_buf *s, int pos); +int scan_link_title(const gh_buf *s, int pos); +int scan_spacechars(const gh_buf *s, int pos); +int scan_atx_header_start(const gh_buf *s, int pos); +int scan_setext_header_line(const gh_buf *s, int pos); +int scan_hrule(const gh_buf *s, int pos); +int scan_open_code_fence(const gh_buf *s, int pos); +int scan_close_code_fence(const gh_buf *s, int pos, int len); +int scan_entity(const gh_buf *s, int pos); diff --git a/src/scanners.re b/src/scanners.re index 305d1ea..7323ef9 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -1,4 +1,4 @@ -#include "bstrlib.h" +#include "buffer.h"  /*!re2c    re2c:define:YYCTYPE  = "unsigned char"; @@ -55,10 +55,10 @@  */  // Try to match URI autolink after first <, returning number of chars matched. -extern int scan_autolink_uri(bstring s, int pos) +extern int scan_autolink_uri(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>]  { return (p - start); } @@ -67,10 +67,10 @@ extern int scan_autolink_uri(bstring s, int pos)  }  // Try to match email autolink after first <, returning num of chars matched. -extern int scan_autolink_email(bstring s, int pos) +extern int scan_autolink_email(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ @@ -83,10 +83,10 @@ extern int scan_autolink_email(bstring s, int pos)  }  // Try to match an HTML tag after first <, returning num of chars matched. -extern int scan_html_tag(bstring s, int pos) +extern int scan_html_tag(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    htmltag { return (p - start); } @@ -96,10 +96,10 @@ extern int scan_html_tag(bstring s, int pos)  // Try to match an HTML block tag including first <,  // returning num of chars matched. -extern int scan_html_block_tag(bstring s, int pos) +extern int scan_html_block_tag(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    [<] [/] blocktagname (spacechar | [>])  { return (p - start); } @@ -113,10 +113,10 @@ extern int scan_html_block_tag(bstring s, int pos)  // This may optionally be contained in <..>; otherwise  // whitespace and unbalanced right parentheses aren't allowed.  // Newlines aren't ever allowed. -extern int scan_link_url(bstring s, int pos) +extern int scan_link_url(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } @@ -128,10 +128,10 @@ extern int scan_link_url(bstring s, int pos)  // Try to match a link title (in single quotes, in double quotes, or  // in parentheses), returning number of chars matched.  Allow one  // level of internal nesting (quotes within quotes). -extern int scan_link_title(bstring s, int pos) +extern int scan_link_title(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    ["] (escaped_char|[^"\x00])* ["]   { return (p - start); } @@ -142,9 +142,9 @@ extern int scan_link_title(bstring s, int pos)  }  // Match space characters, including newlines. -extern int scan_spacechars(bstring s, int pos) +extern int scan_spacechars(const gh_buf *s, int pos)  { -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    [ \t\n]* { return (p - start); } @@ -153,10 +153,10 @@ extern int scan_spacechars(bstring s, int pos)  }  // Match ATX header start. -extern int scan_atx_header_start(bstring s, int pos) +extern int scan_atx_header_start(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    [#]{1,6} ([ ]+|[\n])  { return (p - start); } @@ -166,10 +166,10 @@ extern int scan_atx_header_start(bstring s, int pos)  // Match sexext header line.  Return 1 for level-1 header,  // 2 for level-2, 0 for no match. -extern int scan_setext_header_line(bstring s, int pos) +extern int scan_setext_header_line(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);  /*!re2c    [=]+ [ ]* [\n] { return 1; }    [-]+ [ ]* [\n] { return 2; } @@ -180,10 +180,10 @@ extern int scan_setext_header_line(bstring s, int pos)  // Scan a horizontal rule line: "...three or more hyphens, asterisks,  // or underscores on a line by themselves. If you wish, you may use  // spaces between the hyphens or asterisks." -extern int scan_hrule(bstring s, int pos) +extern int scan_hrule(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    ([*][ ]*){3,} [ \t]* [\n] { return (p - start); } @@ -194,10 +194,10 @@ extern int scan_hrule(bstring s, int pos)  }  // Scan an opening code fence. -extern int scan_open_code_fence(bstring s, int pos) +extern int scan_open_code_fence(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    [`]{3,} / [^`\n\x00]*[\n] { return (p - start); } @@ -207,10 +207,10 @@ extern int scan_open_code_fence(bstring s, int pos)  }  // Scan a closing code fence with length at least len. -extern int scan_close_code_fence(bstring s, int pos, int len) +extern int scan_close_code_fence(const gh_buf *s, int pos, int len)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    ([`]{3,} | [~]{3,}) / spacechar* [\n] @@ -225,10 +225,10 @@ extern int scan_close_code_fence(bstring s, int pos, int len)  // Scans an entity.  // Returns number of chars matched. -extern int scan_entity(bstring s, int pos) +extern int scan_entity(const gh_buf *s, int pos)  {    unsigned char * marker = NULL; -  unsigned char * p = &(s->data[pos]); +  unsigned char * p = &(s->ptr[pos]);    unsigned char * start = p;  /*!re2c    [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] @@ -1,38 +1,38 @@  #include <stdbool.h> -#include "bstrlib.h" +#include "buffer.h"  #include "uthash.h"  #define VERSION "0.1"  #define CODE_INDENT 4 +typedef struct { +	const unsigned char *data; +	int len; +	int alloc; +} chunk; +  typedef struct Inline { -  enum { str, softbreak, linebreak, code, raw_html, entity, -         emph, strong, link, image } tag; -  union { -    bstring                  literal; -    struct Inline*           inlines; -    struct { struct Inline*  label; -             bstring         url; -             bstring         title; -           } linkable; -  } content; -  struct Inline*             next; +	enum { str, softbreak, linebreak, code, raw_html, entity, +		emph, strong, link, image } tag; +	union { +		chunk literal; +		struct Inline *inlines; +		struct { +			struct Inline *label; +			unsigned char *url; +			unsigned char *title; +		} linkable; +	} content; +	struct Inline *next;  } inl;  typedef struct Reference { -  bstring         label; -  bstring         url; -  bstring         title; +  unsigned char *label; +  unsigned char *url; +  unsigned char *title;    UT_hash_handle  hh;      // used by uthash  } reference; -typedef struct Subject { -  bstring        buffer; -  int            pos; -  reference**    reference_map; -  int            label_nestlevel; -} subject; -  // Types for blocks  struct ListData { @@ -51,7 +51,7 @@ struct FencedCodeData {    int               fence_length;    int               fence_offset;    char              fence_char; -  bstring           info; +  gh_buf            info;  };  typedef struct Block { @@ -77,7 +77,8 @@ typedef struct Block {    struct Block*      last_child;    struct Block*      parent;    struct Block*      top; -  bstring            string_content; +  gh_buf			 string_content; +  int				 string_pos;    inl*               inline_content;    union  {      struct ListData       list_data; @@ -89,33 +90,34 @@ typedef struct Block {    struct Block *     prev;  } block; -int parse_inline(subject* subj, inl ** last); -inl* parse_inlines(bstring input, reference** refmap); -inl* parse_inlines_while(subject* subj, int (*f)(subject*)); +inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap);  void free_inlines(inl* e); -int parse_reference(bstring input, reference** refmap); + +int parse_reference(gh_buf *input, int input_pos, reference** refmap);  void free_reference(reference *ref);  void free_reference_map(reference **refmap); -reference* make_reference(bstring label, bstring url, bstring title); -reference* lookup_reference(reference** refmap, bstring label); +  void add_reference(reference** refmap, reference* ref); -int unescape(bstring s); +void unescape_buffer(gh_buf *buf);  extern block* make_document();  extern block* add_child(block* parent,                          int block_type, int start_line, int start_column);  void free_blocks(block* e); +block *stmd_parse_document(const char *buffer, size_t len); +  // FOR NOW: -int process_inlines(block* cur, reference** refmap); -int incorporate_line(bstring ln, int line_number, block** curptr); -int finalize(block* b, int line_number); +void process_inlines(block* cur, reference** refmap); +void incorporate_line(gh_buf *ln, int line_number, block** curptr); +void finalize(block* b, int line_number);  void print_inlines(inl* ils, int indent);  void print_blocks(block* blk, int indent); -int blocks_to_html(block* b, bstring* result, bool tight); -int inlines_to_html(inl* b, bstring* result); +/* TODO */ +// int blocks_to_html(block* b, bstring* result, bool tight); +// int inlines_to_html(inl* b, bstring* result); -int bdetab(bstring s, int utf8); +void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len); @@ -2,105 +2,142 @@  #include "bstrlib.h"  #include "debug.h" -#define advance(s) \ -  s++; \ -  check(*s >> 6 == 0x02, "UTF-8 decode error on byte %x", *s); - -// Reads a unicode code point from a UTF8-encoded string, and -// puts it in the pointer n. If something illegal -// is encountered, 0xFFFD is emitted. -// Returns a pointer to next position in string, or NULL if no -// more characters remain. -extern unsigned char * from_utf8(unsigned char * s, unsigned int *n) +static const int8_t utf8proc_utf8class[256] = { +	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +	4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 }; + +ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)  { -  int x = 0; - -  if (*s == 0) { -    return NULL; -  } else if (*s < 0x80) { -    x = *s; -  } else if (*s >> 5 == 0x06) { -    x = *s & 0x1F; -    advance(s); -    x = (x << 6) + (*s & 0x3F); -  } else if (*s >> 4 == 0x0E) { -    x = *s & 0x0F; -    advance(s); -    x = (x << 6) + (*s & 0x3F); -    advance(s); -    x = (x << 6) + (*s & 0x3F); -  } else if (*s >> 3 == 0x1E) { -    x = *s & 0x07; -    advance(s); -    x = (x << 6) + (*s & 0x3F); -    advance(s); -    x = (x << 6) + (*s & 0x3F); -    advance(s); -    x = (x << 6) + (*s & 0x3F); -  } else if (*s >> 2 == 0x3E) { -    x = *s & 0x03; -    advance(s); -    x = (x << 6) + (*s & 0x3F); -    advance(s); -    x = (x << 6) + (*s & 0x3F); -    advance(s); -    x = (x << 6) + (*s & 0x3F); -    advance(s); -    x = (x << 6) + (*s & 0x3F); -   } else { -    log_err("UTF-8 decode error on byte %x", *s); -    goto error; -  } -  *n = x; -  s++; -  return s; - error: -  *n = 0xFFFD; -  return s; +	ssize_t length, i; + +	if (!str_len) +		return 0; + +	length = utf8proc_utf8class[str[0]]; + +	if (!length) +		return -1; + +	if (str_len >= 0 && length > str_len) +		return -1; + +	for (i = 1; i < length; i++) { +		if ((str[i] & 0xC0) != 0x80) +			return -1; +	} + +	return length; +} + +ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst) +{ +	ssize_t length; +	int32_t uc = -1; + +	*dst = -1; +	length = utf8proc_charlen(str, str_len); +	if (length < 0) +		return -1; + +	switch (length) { +		case 1: +			uc = str[0]; +			break; +		case 2: +			uc = ((str[0] & 0x1F) <<  6) + (str[1] & 0x3F); +			if (uc < 0x80) uc = -1; +			break; +		case 3: +			uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) <<  6) +				+ (str[2] & 0x3F); +			if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) || +					(uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1; +			break; +		case 4: +			uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +				+ ((str[2] & 0x3F) <<  6) + (str[3] & 0x3F); +			if (uc < 0x10000 || uc >= 0x110000) uc = -1; +			break; +	} + +	if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE)) +		return -1; + +	*dst = uc; +	return length;  } -// Converts the unicode code point c to UTF-8, -// putting the result in dest.  Returns 0 on success, -1 on error. -extern int to_utf8(unsigned int c, bstring dest) +void utf8_encode_char(int32_t uc, gh_buf *buf)  { -  if (c < 0x80) { -    bconchar(dest, c); -  } else if (c < 0x800) { -    bconchar(dest, 192 + c/64); -    bconchar(dest, 128 + c%64); -  } else if (c - 0xd800u < 0x800) { -    goto error; -  } else if (c < 0x10000) { -    bconchar(dest, 224 + c / 4096); -    bconchar(dest, 128 + c /64%64); -    bconchar(dest, 128 + c%64); -  } else if (c < 0x110000) { -    bconchar(dest, 240 + c/262144); -    bconchar(dest, 128 + c/4096%64); -    bconchar(dest, 128 + c/64%64); -    bconchar(dest, 128 + c%64); -  } else { -    goto error; -  } -  return 0; -error: -  return -1; +	char dst[4]; +	int len = 0; + +	if (uc < 0x00) { +		assert(false); +	} else if (uc < 0x80) { +		dst[0] = uc; +		len = 1; +	} else if (uc < 0x800) { +		dst[0] = 0xC0 + (uc >> 6); +		dst[1] = 0x80 + (uc & 0x3F); +		len = 2; +	} else if (uc == 0xFFFF) { +		dst[0] = 0xFF; +		return 1; +	} else if (uc == 0xFFFE) { +		dst[0] = 0xFE; +		len = 1; +	} else if (uc < 0x10000) { +		dst[0] = 0xE0 + (uc >> 12); +		dst[1] = 0x80 + ((uc >> 6) & 0x3F); +		dst[2] = 0x80 + (uc & 0x3F); +		len = 3; +	} else if (uc < 0x110000) { +		dst[0] = 0xF0 + (uc >> 18); +		dst[1] = 0x80 + ((uc >> 12) & 0x3F); +		dst[2] = 0x80 + ((uc >> 6) & 0x3F); +		dst[3] = 0x80 + (uc & 0x3F); +		len = 4; +	} else { +		assert(false); +	} + +	gh_buf_put(buf, dst, len);  } +void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len) +{ +	int32_t c; +  #define bufpush(x) \ -  check(to_utf8(x, buf) == 0, "UTF-8 encode error on code point  %04x", x) +	utf8proc_encode_char(x, dest) -// Returns the case-folded version of the source string, or NULL on error. -extern bstring case_fold(bstring source) -{ -  unsigned char * s = source->data; -  unsigned int c = 0; -  bstring buf = bfromcstr(""); -  while ((s = from_utf8(s, &c))) { -#include "case_fold_switch.c" -  } -  return buf; -error: -  return NULL; +	while (len > 0) { +		ssize_t char_len = utf8proc_iterate(str, len, &c); + +		if (char_len < 0) { +			bufpush(0xFFFD); +			continue; +		} + +#include "case_fold_switch.inc" + +		str += char_len; +		len -= char_len; +	}  }  | 
