This commit is contained in:
2024-12-14 10:43:33 +03:00
parent 7f8090bb1f
commit bbf76e4925
1292 changed files with 2823 additions and 500876 deletions

View File

@@ -1,99 +0,0 @@
/* Conversion to UTF-16/UTF-32 from legacy encodings.
Copyright (C) 2002, 2006-2007, 2009-2024 Free Software Foundation, Inc.
This file is free software.
It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
You can redistribute it and/or modify it under either
- the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3, or (at your
option) any later version, or
- the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option)
any later version, or
- the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License and the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public
License and of the GNU General Public License along with this
program. If not, see <https://www.gnu.org/licenses/>. */
UNIT *
FUNC (const char *fromcode,
enum iconv_ilseq_handler handler,
const char *src, size_t srclen,
size_t *offsets,
UNIT *resultbuf, size_t *lengthp)
{
#if HAVE_UTF_NAME
char *result = (char *) resultbuf;
size_t length = *lengthp * sizeof (UNIT);
if (mem_iconveha (src, srclen, fromcode, UTF_NAME, true, handler,
offsets, &result, &length) < 0)
return NULL;
if (offsets != NULL)
{
/* Convert 'char *' offsets to 'UNIT *' offsets. */
size_t *offsets_end = offsets + srclen;
size_t *o;
for (o = offsets; o < offsets_end; o++)
if (*o != (size_t)(-1))
*o = *o / sizeof (UNIT);
}
if ((length % sizeof (UNIT)) != 0)
abort ();
*lengthp = length / sizeof (UNIT);
return (UNIT *) result;
#else
uint8_t *utf8_string;
size_t utf8_length;
UNIT *result;
utf8_string =
u8_conv_from_encoding (fromcode, handler, src, srclen, offsets,
NULL, &utf8_length);
if (utf8_string == NULL)
return NULL;
result = U8_TO_U (utf8_string, utf8_length, resultbuf, lengthp);
if (result == NULL)
{
int saved_errno = errno;
free (utf8_string);
errno = saved_errno;
return NULL;
}
if (offsets != NULL)
{
size_t length = *lengthp;
size_t *offsets_end = offsets + srclen;
size_t *o;
size_t off8 = 0; /* offset into utf8_string */
size_t offunit = 0; /* offset into result */
for (o = offsets; o < offsets_end; o++)
if (*o != (size_t)(-1))
{
while (off8 < *o)
{
int count8 = u8_mblen (utf8_string + off8, utf8_length - off8);
int countunit = U_MBLEN (result + offunit, length - offunit);
if (count8 < 0 || countunit < 0)
abort ();
off8 += count8;
offunit += countunit;
}
if (*o != off8)
abort ();
*o = offunit;
}
}
free (utf8_string);
return result;
#endif
}

View File

@@ -1,164 +0,0 @@
/* Conversion from UTF-16/UTF-32 to legacy encodings.
Copyright (C) 2002, 2006-2024 Free Software Foundation, Inc.
This file is free software.
It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
You can redistribute it and/or modify it under either
- the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3, or (at your
option) any later version, or
- the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option)
any later version, or
- the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License and the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public
License and of the GNU General Public License along with this
program. If not, see <https://www.gnu.org/licenses/>. */
char *
FUNC (const char *tocode,
enum iconv_ilseq_handler handler,
const UNIT *src, size_t srclen,
size_t *offsets,
char *resultbuf, size_t *lengthp)
{
#if HAVE_UTF_NAME
size_t *scaled_offsets;
char *result;
size_t length;
if (offsets != NULL && srclen > 0)
{
scaled_offsets =
(size_t *) malloc (srclen * sizeof (UNIT) * sizeof (size_t));
if (scaled_offsets == NULL)
{
errno = ENOMEM;
return NULL;
}
}
else
scaled_offsets = NULL;
result = resultbuf;
length = *lengthp;
if (mem_iconveha ((const char *) src, srclen * sizeof (UNIT),
UTF_NAME, tocode,
handler == iconveh_question_mark, handler,
scaled_offsets, &result, &length) < 0)
{
int saved_errno = errno;
free (scaled_offsets);
errno = saved_errno;
return NULL;
}
if (offsets != NULL)
{
/* Convert scaled_offsets[srclen * sizeof (UNIT)] to
offsets[srclen]. */
size_t i;
for (i = 0; i < srclen; i++)
offsets[i] = scaled_offsets[i * sizeof (UNIT)];
free (scaled_offsets);
}
if (result == NULL) /* when (resultbuf == NULL && length == 0) */
{
result = (char *) malloc (1);
if (result == NULL)
{
errno = ENOMEM;
return NULL;
}
}
*lengthp = length;
return result;
#else
uint8_t tmpbuf[4096];
size_t tmpbufsize = SIZEOF (tmpbuf);
uint8_t *utf8_src;
size_t utf8_srclen;
size_t *scaled_offsets;
char *result;
utf8_src = U_TO_U8 (src, srclen, tmpbuf, &tmpbufsize);
if (utf8_src == NULL)
return NULL;
utf8_srclen = tmpbufsize;
if (offsets != NULL && utf8_srclen > 0)
{
scaled_offsets = (size_t *) malloc (utf8_srclen * sizeof (size_t));
if (scaled_offsets == NULL)
{
if (utf8_src != tmpbuf)
free (utf8_src);
errno = ENOMEM;
return NULL;
}
}
else
scaled_offsets = NULL;
result = u8_conv_to_encoding (tocode, handler, utf8_src, utf8_srclen,
scaled_offsets, resultbuf, lengthp);
if (result == NULL)
{
int saved_errno = errno;
free (scaled_offsets);
if (utf8_src != tmpbuf)
free (utf8_src);
errno = saved_errno;
return NULL;
}
if (offsets != NULL)
{
size_t iunit; /* offset into src */
size_t i8; /* offset into utf8_src */
for (iunit = 0; iunit < srclen; iunit++)
offsets[iunit] = (size_t)(-1);
iunit = 0;
i8 = 0;
while (iunit < srclen && i8 < utf8_srclen)
{
int countunit;
int count8;
offsets[iunit] = scaled_offsets[i8];
countunit = U_MBLEN (src + iunit, srclen - iunit);
count8 = u8_mblen (utf8_src + i8, utf8_srclen - i8);
if (countunit < 0 || count8 < 0)
abort ();
iunit += countunit;
i8 += count8;
}
/* Check that utf8_src has been traversed entirely. */
if (i8 < utf8_srclen)
abort ();
/* Check that src has been traversed entirely, except possibly for an
incomplete sequence of units at the end. */
if (iunit < srclen)
{
offsets[iunit] = *lengthp;
if (!(U_MBLEN (src + iunit, srclen - iunit) < 0))
abort ();
}
free (scaled_offsets);
}
if (utf8_src != tmpbuf)
free (utf8_src);
return result;
#endif
}

View File

@@ -1,40 +0,0 @@
/* Conversion to UTF-8/UTF-16/UTF-32 from legacy encodings.
Copyright (C) 2002, 2006-2007, 2009-2024 Free Software Foundation, Inc.
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation; either version 2.1 of the
License, or (at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
UNIT *
FUNC (const char *string,
const char *fromcode,
enum iconv_ilseq_handler handler)
{
UNIT *result;
size_t length;
result =
U_CONV_FROM_ENCODING (fromcode, handler,
string, strlen (string) + 1, NULL,
NULL, &length);
if (result == NULL)
return NULL;
/* Verify the result has exactly one NUL unit, at the end. */
if (!(length > 0 && result[length-1] == 0
&& U_STRLEN (result) == length-1))
{
free (result);
errno = EILSEQ;
return NULL;
}
return result;
}

View File

@@ -1,71 +0,0 @@
/* Conversion from UTF-16/UTF-32 to legacy encodings.
Copyright (C) 2002, 2006-2007, 2009-2024 Free Software Foundation, Inc.
This file is free software.
It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
You can redistribute it and/or modify it under either
- the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3, or (at your
option) any later version, or
- the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option)
any later version, or
- the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License and the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public
License and of the GNU General Public License along with this
program. If not, see <https://www.gnu.org/licenses/>. */
char *
FUNC (const UNIT *string,
const char *tocode,
enum iconv_ilseq_handler handler)
{
#if HAVE_UTF_NAME
char *result = NULL;
size_t length = 0;
if (mem_iconveha ((const char *) string, (U_STRLEN (string) + 1) * sizeof (UNIT),
UTF_NAME, tocode,
handler == iconveh_question_mark, handler,
NULL, &result, &length) < 0)
return NULL;
/* Verify the result has exactly one NUL byte, at the end. */
if (!(length > 0 && result[length-1] == '\0' && strlen (result) == length-1))
{
free (result);
errno = EILSEQ;
return NULL;
}
return result;
#else
uint8_t tmpbuf[4096];
size_t tmpbufsize = SIZEOF (tmpbuf);
uint8_t *utf8_string;
char *result;
utf8_string = U_TO_U8 (string, U_STRLEN (string) + 1, tmpbuf, &tmpbufsize);
if (utf8_string == NULL)
return NULL;
result = u8_strconv_to_encoding (utf8_string, tocode, handler);
if (result == NULL)
{
if (utf8_string != tmpbuf)
{
int saved_errno = errno;
free (utf8_string);
errno = saved_errno;
}
return NULL;
}
if (utf8_string != tmpbuf)
free (utf8_string);
return result;
#endif
}