Libparserutils
|
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <parserutils/charset/mibenum.h>
#include <parserutils/charset/utf8.h>
#include <parserutils/input/inputstream.h>
#include "input/filter.h"
#include "utils/utils.h"
Go to the source code of this file.
Data Structures | |
struct | parserutils_inputstream_private |
Private input stream definition. More... | |
Macros | |
#define | IS_ASCII(x) (((x) & 0x80) == 0) |
#define | UTF32_BOM_LEN (4) |
#define | UTF16_BOM_LEN (2) |
#define | UTF8_BOM_LEN (3) |
Typedefs | |
typedef struct parserutils_inputstream_private | parserutils_inputstream_private |
Private input stream definition. | |
Functions | |
static parserutils_error | parserutils_inputstream_refill_buffer (parserutils_inputstream_private *stream) |
Refill the UTF-8 buffer from the raw buffer. | |
static parserutils_error | parserutils_inputstream_strip_bom (uint16_t *mibenum, parserutils_buffer *buffer) |
Strip a BOM from a buffer in the given encoding. | |
parserutils_error | parserutils_inputstream_create (const char *enc, uint32_t encsrc, parserutils_charset_detect_func csdetect, parserutils_inputstream **stream) |
Create an input stream. | |
parserutils_error | parserutils_inputstream_destroy (parserutils_inputstream *stream) |
Destroy an input stream. | |
parserutils_error | parserutils_inputstream_append (parserutils_inputstream *stream, const uint8_t *data, size_t len) |
Append data to an input stream. | |
parserutils_error | parserutils_inputstream_insert (parserutils_inputstream *stream, const uint8_t *data, size_t len) |
Insert data into stream at current location. | |
parserutils_error | parserutils_inputstream_peek_slow (parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length) |
Look at the character in the stream that starts at offset bytes from the cursor (slow version) | |
const char * | parserutils_inputstream_read_charset (parserutils_inputstream *stream, uint32_t *source) |
Read the source charset of the input stream. | |
parserutils_error | parserutils_inputstream_change_charset (parserutils_inputstream *stream, const char *enc, uint32_t source) |
Change the source charset of the input stream. | |
#define IS_ASCII | ( | x | ) | (((x) & 0x80) == 0) |
Definition at line 209 of file inputstream.c.
#define UTF16_BOM_LEN (2) |
#define UTF32_BOM_LEN (4) |
#define UTF8_BOM_LEN (3) |
typedef struct parserutils_inputstream_private parserutils_inputstream_private |
Private input stream definition.
parserutils_error parserutils_inputstream_append | ( | parserutils_inputstream * | stream, |
const uint8_t * | data, | ||
size_t | len | ||
) |
Append data to an input stream.
stream | Input stream to append data to |
data | Data to append (in document charset), or NULL to flag EOF |
len | Length, in bytes, of data |
Definition at line 169 of file inputstream.c.
References parserutils_inputstream::had_eof, len, PARSERUTILS_BADPARM, parserutils_buffer_append(), PARSERUTILS_OK, parserutils_inputstream_private::public, and parserutils_inputstream_private::raw.
parserutils_error parserutils_inputstream_change_charset | ( | parserutils_inputstream * | stream, |
const char * | enc, | ||
uint32_t | source | ||
) |
Change the source charset of the input stream.
stream | Input stream to modify |
enc | Charset name |
source | Charset source identifier |
Definition at line 321 of file inputstream.c.
References parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream_private::input, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_setopt(), PARSERUTILS_BADENCODING, PARSERUTILS_BADPARM, parserutils_charset_mibenum_from_name(), PARSERUTILS_FILTER_SET_ENCODING, PARSERUTILS_INVALID, and PARSERUTILS_OK.
parserutils_error parserutils_inputstream_create | ( | const char * | enc, |
uint32_t | encsrc, | ||
parserutils_charset_detect_func | csdetect, | ||
parserutils_inputstream ** | stream | ||
) |
Create an input stream.
enc | Document charset, or NULL to autodetect |
encsrc | Value for encoding source, if specified, or 0 |
csdetect | Charset detection function, or NULL |
stream | Pointer to location to receive stream instance |
The value 0 is defined as being the lowest priority encoding source (i.e. the default fallback encoding). Beyond this, no further interpretation is made upon the encoding source.
Definition at line 59 of file inputstream.c.
References parserutils_inputstream_private::csdetect, parserutils_inputstream::cursor, parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream::had_eof, parserutils_inputstream_private::input, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_create(), parserutils__filter_destroy(), parserutils__filter_setopt(), PARSERUTILS_BADENCODING, PARSERUTILS_BADPARM, parserutils_buffer_create(), parserutils_buffer_destroy(), parserutils_charset_mibenum_from_name(), PARSERUTILS_FILTER_SET_ENCODING, PARSERUTILS_NOMEM, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.
parserutils_error parserutils_inputstream_destroy | ( | parserutils_inputstream * | stream | ) |
Destroy an input stream.
stream | Input stream to destroy |
Definition at line 144 of file inputstream.c.
References parserutils_inputstream_private::input, parserutils__filter_destroy(), PARSERUTILS_BADPARM, parserutils_buffer_destroy(), PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.
parserutils_error parserutils_inputstream_insert | ( | parserutils_inputstream * | stream, |
const uint8_t * | data, | ||
size_t | len | ||
) |
Insert data into stream at current location.
stream | Input stream to insert into |
data | Data to insert (UTF-8 encoded) |
len | Length, in bytes, of data |
Definition at line 195 of file inputstream.c.
References parserutils_inputstream::cursor, len, PARSERUTILS_BADPARM, parserutils_buffer_insert(), parserutils_inputstream_private::public, and parserutils_inputstream::utf8.
parserutils_error parserutils_inputstream_peek_slow | ( | parserutils_inputstream * | stream, |
size_t | offset, | ||
const uint8_t ** | ptr, | ||
size_t * | length | ||
) |
Look at the character in the stream that starts at offset bytes from the cursor (slow version)
stream | Stream to look in |
offset | Byte offset of start of character |
ptr | Pointer to location to receive pointer to character data |
length | Pointer to location to receive character length (in bytes) |
Once the character pointed to by the result of this call has been advanced past (i.e. parserutils_inputstream_advance has caused the stream cursor to pass over the character), then no guarantee is made as to the validity of the data pointed to. Thus, any attempt to dereference the pointer after advancing past the data it points to is a bug.
Definition at line 232 of file inputstream.c.
References parserutils_inputstream::cursor, parserutils_buffer::data, parserutils_inputstream::had_eof, IS_ASCII, len, parserutils_buffer::length, PARSERUTILS_BADPARM, parserutils_charset_utf8_char_byte_length(), PARSERUTILS_EOF, parserutils_inputstream_refill_buffer(), PARSERUTILS_NEEDDATA, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, and parserutils_inputstream::utf8.
Referenced by parserutils_inputstream_peek().
const char * parserutils_inputstream_read_charset | ( | parserutils_inputstream * | stream, |
uint32_t * | source | ||
) |
Read the source charset of the input stream.
stream | Input stream to query |
source | Pointer to location to receive charset source identifier |
Definition at line 292 of file inputstream.c.
References parserutils_inputstream_private::encsrc, parserutils_inputstream_private::mibenum, and parserutils_charset_mibenum_to_name().
|
inlinestatic |
Refill the UTF-8 buffer from the raw buffer.
stream | The inputstream to operate on |
Definition at line 365 of file inputstream.c.
References parserutils_buffer::allocated, parserutils_inputstream_private::csdetect, parserutils_inputstream::cursor, parserutils_buffer::data, parserutils_inputstream_private::done_first_chunk, parserutils_filter_optparams::encoding, parserutils_inputstream_private::encsrc, parserutils_inputstream::had_eof, parserutils_inputstream_private::input, parserutils_buffer::length, parserutils_inputstream_private::mibenum, parserutils_filter_optparams::name, parserutils__filter_process_chunk(), parserutils__filter_setopt(), parserutils_buffer_discard(), parserutils_buffer_grow(), parserutils_charset_mibenum_from_name(), parserutils_charset_mibenum_to_name(), PARSERUTILS_FILTER_SET_ENCODING, parserutils_inputstream_strip_bom(), PARSERUTILS_NEEDDATA, PARSERUTILS_NOMEM, PARSERUTILS_OK, parserutils_inputstream_private::public, parserutils_inputstream_private::raw, SLEN, and parserutils_inputstream::utf8.
Referenced by parserutils_inputstream_peek_slow().
|
inlinestatic |
Strip a BOM from a buffer in the given encoding.
mibenum | Pointer to the character set of the buffer, updated on exit |
buffer | The buffer to process |
Definition at line 496 of file inputstream.c.
References parserutils_buffer::data, parserutils_buffer::length, parserutils_buffer_discard(), parserutils_charset_mibenum_from_name(), PARSERUTILS_OK, SLEN, UTF16_BOM_LEN, UTF32_BOM_LEN, and UTF8_BOM_LEN.
Referenced by parserutils_inputstream_refill_buffer().