naev 0.11.5
utf8.h
Go to the documentation of this file.
1#pragma once
2
4#include <stdlib.h>
5#include <stdint.h>
15#include "nstring.h"
16
17/* is c the start of a utf8 sequence? */
18#define isutf(c) (((c)&0xC0)!=0x80)
19
20#define UEOF ((uint32_t)-1)
21
22/* convert UTF-8 data to wide character */
23size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
24
25/* the opposite conversion */
26size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz);
27
28/* single character to UTF-8, returns # bytes written */
29size_t u8_wc_toutf8(char *dest, uint32_t ch);
30
31/* character number to byte offset */
32size_t u8_offset(const char *str, size_t charnum);
33
34/* byte offset to character number */
35size_t u8_charnum(const char *s, size_t offset);
36
37/* return next character, updating an index variable */
38uint32_t u8_nextchar(const char *s, size_t *i);
39
40/* next character without NUL character terminator */
41uint32_t u8_nextmemchar(const char *s, size_t *i);
42
43/* move to next character */
44void u8_inc(const char *s, size_t *i);
45
46/* move to previous character */
47void u8_dec(const char *s, size_t *i);
48
49/* returns length of next utf-8 sequence */
50size_t u8_seqlen(const char *s);
51
52/* returns the # of bytes needed to encode a certain character */
53size_t u8_charlen(uint32_t ch);
54
55/* computes the # of bytes needed to encode a WC string as UTF-8 */
56size_t u8_codingsize(uint32_t *wcstr, size_t n);
57
58char read_escape_control_char(char c);
59
60/* assuming src points to the character after a backslash, read an
61 escape sequence, storing the result in dest and returning the number of
62 input characters processed */
63size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
64
65/* convert a string "src" containing escape sequences to UTF-8 */
66size_t u8_unescape(char *buf, size_t sz, const char *src);
67
68/* utility predicates used by the above */
69int octal_digit(char c);
70int hex_digit(char c);
71
72/* return a pointer to the first occurrence of ch in s, or NULL if not
73 found. character index of found character returned in *charn. */
74char *u8_strchr(const char *s, uint32_t ch, size_t *charn);
75
76/* same as the above, but searches a buffer of a given size instead of
77 a NUL-terminated string. */
78char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn);
79
80char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
81
82/* count the number of characters in a UTF-8 string */
83size_t u8_strlen(const char *s);
84
85/* determine whether a sequence of bytes is valid UTF-8. length is in bytes */
86int u8_isvalid(const char *str, size_t length);
87
88/* reverse a UTF-8 string. len is length in bytes. dest and src must both
89 be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise */
90int u8_reverse(char *dest, char *src, size_t len);
static const double c[]
Definition rng.c:264