139#include <config_auto.h>
149#include "allheaders.h"
152static const l_uint32 MaxPtrArraySize = 50000000;
173 if (n <= 0 || n > (l_int32)MaxPtrArraySize)
177 if ((sa->
array = (
char **)LEPT_CALLOC(n,
sizeof(
char *))) == NULL) {
179 return (
SARRAY *)ERROR_PTR(
"ptr array not made", __func__, NULL);
204 return (
SARRAY *)ERROR_PTR(
"n must be > 0", __func__, NULL);
206 return (
SARRAY *)ERROR_PTR(
"initstr not defined", __func__, NULL);
209 for (i = 0; i < n; i++)
230char separators[] =
" \n\t";
231l_int32 i, nsub, size, inword;
235 return (
SARRAY *)ERROR_PTR(
"textstr not defined", __func__, NULL);
238 size = strlen(
string);
241 for (i = 0; i < size; i++) {
242 if (inword == FALSE &&
243 (
string[i] !=
' ' &&
string[i] !=
'\t' &&
string[i] !=
'\n')) {
246 }
else if (inword == TRUE &&
247 (
string[i] ==
' ' ||
string[i] ==
'\t' ||
string[i] ==
'\n')) {
253 return (
SARRAY *)ERROR_PTR(
"sa not made", __func__, NULL);
254 sarraySplitString(sa,
string, separators);
279l_int32 i, nsub, size, startptr;
280char *cstring, *substring;
284 return (
SARRAY *)ERROR_PTR(
"textstr not defined", __func__, NULL);
287 size = strlen(
string);
289 for (i = 0; i < size; i++) {
290 if (
string[i] ==
'\n')
295 return (
SARRAY *)ERROR_PTR(
"sa not made", __func__, NULL);
299 if ((cstring = stringNew(
string)) == NULL) {
301 return (
SARRAY *)ERROR_PTR(
"cstring not made", __func__, NULL);
305 for (i = 0; i < size; i++) {
306 if (cstring[i] ==
'\n') {
308 if (i > 0 && cstring[i - 1] ==
'\r')
309 cstring[i - 1] =
'\0';
310 if ((substring = stringNew(cstring + startptr)) == NULL) {
313 return (
SARRAY *)ERROR_PTR(
"substring not made",
321 if (startptr < size) {
322 if ((substring = stringNew(cstring + startptr)) == NULL) {
325 return (
SARRAY *)ERROR_PTR(
"substring not made",
333 sarraySplitString(sa,
string,
"\r\n");
359 L_WARNING(
"ptr address is NULL!\n", __func__);
362 if ((sa = *psa) == NULL)
367 for (i = 0; i < sa->
n; i++) {
369 LEPT_FREE(sa->
array[i]);
371 LEPT_FREE(sa->
array);
392 return (
SARRAY *)ERROR_PTR(
"sa not defined", __func__, NULL);
395 return (
SARRAY *)ERROR_PTR(
"csa not made", __func__, NULL);
397 for (i = 0; i < sa->
n; i++)
414 return (
SARRAY *)ERROR_PTR(
"sa not defined", __func__, NULL);
442 return ERROR_INT(
"sa not defined", __func__, 1);
444 return ERROR_INT(
"string not defined", __func__, 1);
446 return ERROR_INT(
"invalid copyflag", __func__, 1);
451 return ERROR_INT(
"extension failed", __func__, 1);
455 sa->
array[n] = stringNew(
string);
457 sa->
array[n] = (
char *)
string;
478size_t oldsize, newsize;
481 return ERROR_INT(
"sa not defined", __func__, 1);
482 if (sa->
nalloc >= (l_int32)MaxPtrArraySize)
483 return ERROR_INT(
"sa at maximum ptr size; can't extend", __func__, 1);
484 oldsize = sa->
nalloc *
sizeof(
char *);
485 if (sa->
nalloc > (l_int32)(MaxPtrArraySize / 2)) {
486 newsize = MaxPtrArraySize *
sizeof(
char *);
487 sa->
nalloc = (l_int32)MaxPtrArraySize;
489 newsize = 2 * oldsize;
492 if ((sa->
array = (
char **)reallocNew((
void **)&sa->
array,
493 oldsize, newsize)) == NULL)
494 return ERROR_INT(
"new ptr array not returned", __func__, 1);
516 return (
char *)ERROR_PTR(
"sa not defined", __func__, NULL);
519 return (
char *)ERROR_PTR(
"array not returned", __func__, NULL);
521 if (index < 0 || index >= n)
522 return (
char *)ERROR_PTR(
"array index out of bounds", __func__, NULL);
524 string = array[index];
530 for (i = index; i < n - 1; i++)
531 array[i] = array[i + 1];
566 return ERROR_INT(
"sa not defined", __func__, 1);
568 if (index < 0 || index >= n)
569 return ERROR_INT(
"array index out of bounds", __func__, 1);
571 return ERROR_INT(
"newstr not defined", __func__, 1);
573 return ERROR_INT(
"invalid copyflag", __func__, 1);
575 LEPT_FREE(sa->
array[index]);
579 str = stringNew(newstr);
580 sa->
array[index] = str;
597 return ERROR_INT(
"sa not defined", __func__, 1);
598 for (i = 0; i < sa->
n; i++) {
599 LEPT_FREE(sa->
array[i]);
620 return ERROR_INT(
"sa not defined", __func__, 0);
647 return (
char **)ERROR_PTR(
"sa not defined", __func__, NULL);
650 if (pnalloc) *pnalloc = sa->
nalloc;
678 return (
char *)ERROR_PTR(
"sa not defined", __func__, NULL);
679 if (index < 0 || index >= sa->
n)
680 return (
char *)ERROR_PTR(
"index not valid", __func__, NULL);
682 return (
char *)ERROR_PTR(
"invalid copyflag", __func__, NULL);
685 return sa->
array[index];
687 return stringNew(sa->
array[index]);
720 return (
char *)ERROR_PTR(
"sa not defined", __func__, NULL);
754char *dest, *src, *str;
755l_int32 n, i, last, size, index, len;
758 return (
char *)ERROR_PTR(
"sa not defined", __func__, NULL);
759 if (addnlflag != 0 && addnlflag != 1 && addnlflag != 2 && addnlflag != 3)
760 return (
char *)ERROR_PTR(
"invalid addnlflag", __func__, NULL);
768 return stringNew(
"");
770 return stringNew(
"\n");
772 return stringNew(
" ");
774 return stringNew(
",");
776 return (
char *)ERROR_PTR(
"first not valid", __func__, NULL);
781 if (first < 0 || first >= n)
782 return (
char *)ERROR_PTR(
"first not valid", __func__, NULL);
783 if (nstrings == 0 || (nstrings > n - first))
784 nstrings = n - first;
785 last = first + nstrings - 1;
789 for (i = first; i <= last; i++) {
791 return (
char *)ERROR_PTR(
"str not found", __func__, NULL);
792 size += strlen(str) + 2;
794 if ((dest = (
char *)LEPT_CALLOC(size + 1,
sizeof(
char))) == NULL)
795 return (
char *)ERROR_PTR(
"dest not made", __func__, NULL);
799 for (i = first; i <= last; i++) {
802 memcpy(dest + index, src, len);
804 if (addnlflag == 1) {
807 }
else if (addnlflag == 2) {
810 }
else if (addnlflag == 3) {
848l_int32 i, first, ntot, nstr;
854 return (
SARRAY *)ERROR_PTR(
"sa not defined", __func__, NULL);
857 return (
SARRAY *)ERROR_PTR(
"n must be >= 1", __func__, NULL);
859 L_ERROR(
"n = %d > ntot = %d\n", __func__, n, ntot);
862 if (addnlflag != 0 && addnlflag != 1 && addnlflag != 2 && addnlflag != 3)
863 return (
SARRAY *)ERROR_PTR(
"invalid addnlflag", __func__, NULL);
866 na = numaGetUniformBinSizes(ntot, n);
867 for (i = 0, first = 0; i < n; i++) {
868 numaGetIValue(na, i, &nstr);
901 return ERROR_INT(
"sa1 not defined", __func__, 1);
903 return ERROR_INT(
"sa2 not defined", __func__, 1);
906 for (i = 0; i < n; i++) {
909 L_ERROR(
"failed to add string at i = %d\n", __func__, i);
944 return ERROR_INT(
"sa1 not defined", __func__, 1);
946 return ERROR_INT(
"sa2 not defined", __func__, 1);
951 if (end < 0 || end >= n)
954 return ERROR_INT(
"start > end", __func__, 1);
956 for (i = start; i <= end; i++) {
987 const char *padstring)
992 return ERROR_INT(
"both sa1 and sa2 not defined", __func__, 1);
997 for (i = n1; i < n2; i++)
999 }
else if (n1 > n2) {
1000 for (i = n2; i < n1; i++)
1046char emptystring[] =
"";
1047l_int32 n, i, len, totlen;
1051 return (
SARRAY *)ERROR_PTR(
"sa not defined", __func__, NULL);
1057 for (i = 0; i < n; i++) {
1070 }
else if (totlen == 0 && len + 1 > linesize) {
1072 }
else if (totlen + len + 1 > linesize) {
1111sarraySplitString(
SARRAY *sa,
1113 const char *separators)
1115char *cstr, *substr, *saveptr;
1118 return ERROR_INT(
"sa not defined", __func__, 1);
1120 return ERROR_INT(
"str not defined", __func__, 1);
1122 return ERROR_INT(
"separators not defined", __func__, 1);
1124 cstr = stringNew(str);
1126 substr = strtokSafe(cstr, separators, &saveptr);
1129 while ((substr = strtokSafe(NULL, separators, &saveptr)))
1160l_int32 n, i, offset, found;
1164 return (
SARRAY *)ERROR_PTR(
"sain not defined", __func__, NULL);
1167 if (!substr || n == 0)
1171 for (i = 0; i < n; i++) {
1173 arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
1174 strlen(substr), &offset, &found);
1209 return (
SARRAY *)ERROR_PTR(
"sain not defined", __func__, NULL);
1210 if (first < 0) first = 0;
1212 if (last <= 0) last = n - 1;
1214 L_WARNING(
"last > n - 1; setting to n - 1\n", __func__);
1218 return (
SARRAY *)ERROR_PTR(
"first must be >= last", __func__, NULL);
1221 for (i = first; i <= last; i++) {
1269 l_int32 *pactualstart,
1276l_int32 n, i, offset, found;
1279 return ERROR_INT(
"sa not defined", __func__, 1);
1280 if (!pactualstart || !pend || !pnewstart)
1281 return ERROR_INT(
"not all range addresses defined", __func__, 1);
1283 *pactualstart = *pend = *pnewstart = n;
1285 return ERROR_INT(
"substr not defined", __func__, 1);
1288 if (start < 0 || start >= n)
1290 for (i = start; i < n; i++) {
1292 arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
1293 strlen(substr), &offset, &found);
1297 if (!found || offset != loc)
break;
1305 *pactualstart = start;
1306 for (i = start + 1; i < n; i++) {
1308 arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
1309 strlen(substr), &offset, &found);
1313 if (found && offset == loc)
break;
1323 for (i = start; i < n; i++) {
1325 arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
1326 strlen(substr), &offset, &found);
1330 if (!found || offset != loc)
break;
1356 return (
SARRAY *)ERROR_PTR(
"filename not defined", __func__, NULL);
1358 if ((fp = fopenReadStream(filename)) == NULL)
1359 return (
SARRAY *)ERROR_PTR_1(
"stream not opened",
1360 filename, __func__, NULL);
1364 return (
SARRAY *)ERROR_PTR_1(
"sa not read", filename, __func__, NULL);
1390l_int32 i, n, size, index, bufsize, version, ignore, success;
1394 return (
SARRAY *)ERROR_PTR(
"stream not defined", __func__, NULL);
1396 if (fscanf(fp,
"\nSarray Version %d\n", &version) != 1)
1397 return (
SARRAY *)ERROR_PTR(
"not an sarray file", __func__, NULL);
1399 return (
SARRAY *)ERROR_PTR(
"invalid sarray version", __func__, NULL);
1400 if (fscanf(fp,
"Number of strings = %d\n", &n) != 1)
1401 return (
SARRAY *)ERROR_PTR(
"error on # strings", __func__, NULL);
1403 return (
SARRAY *)ERROR_PTR(
"num string ptrs <= 0", __func__, NULL);
1404 if (n > (l_int32)MaxPtrArraySize)
1405 return (
SARRAY *)ERROR_PTR(
"too many string ptrs", __func__, NULL);
1406 if (n == 0) L_INFO(
"the sarray is empty\n", __func__);
1410 return (
SARRAY *)ERROR_PTR(
"sa not made", __func__, NULL);
1412 stringbuf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char));
1414 for (i = 0; i < n; i++) {
1416 if ((fscanf(fp,
"%d[%d]:", &index, &size) != 2) || (size > (1 << 30))) {
1418 L_ERROR(
"error on string size\n", __func__);
1422 if (size > bufsize - 5) {
1423 LEPT_FREE(stringbuf);
1424 bufsize = (l_int32)(1.5 * size);
1425 stringbuf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char));
1428 if (fread(stringbuf, 1, size + 3, fp) != size + 3) {
1430 L_ERROR(
"error reading string\n", __func__);
1434 stringbuf[size + 2] =
'\0';
1438 ignore = fscanf(fp,
"\n");
1441 LEPT_FREE(stringbuf);
1462 return (
SARRAY *)ERROR_PTR(
"data not defined", __func__, NULL);
1463 if ((fp = fopenReadFromMemory(data, size)) == NULL)
1464 return (
SARRAY *)ERROR_PTR(
"stream not opened", __func__, NULL);
1468 if (!sa) L_ERROR(
"sarray not read\n", __func__);
1488 return ERROR_INT(
"filename not defined", __func__, 1);
1490 return ERROR_INT(
"sa not defined", __func__, 1);
1492 if ((fp = fopenWriteStream(filename,
"w")) == NULL)
1493 return ERROR_INT_1(
"stream not opened", filename, __func__, 1);
1497 return ERROR_INT_1(
"sa not written to stream", filename, __func__, 1);
1522 return ERROR_INT(
"stream not defined", __func__, 1);
1528 fprintf(fp,
"Number of strings = %d\n", n);
1529 for (i = 0; i < n; i++) {
1530 len = strlen(sa->
array[i]);
1531 fprintf(fp,
" %d[%d]: %s\n", i, len, sa->
array[i]);
1551 return ERROR_INT(
"sa not defined", __func__, 1);
1555 lept_stderr(
"Number of strings = %d\n", n);
1556 for (i = 0; i < n; i++) {
1557 len = strlen(sa->
array[i]);
1558 lept_stderr(
" %d[%d]: %s\n", i, len, sa->
array[i]);
1586 if (pdata) *pdata = NULL;
1587 if (psize) *psize = 0;
1589 return ERROR_INT(
"&data not defined", __func__, 1);
1591 return ERROR_INT(
"&size not defined", __func__, 1);
1593 return ERROR_INT(
"sa not defined", __func__, 1);
1596 if ((fp = open_memstream((
char **)pdata, psize)) == NULL)
1597 return ERROR_INT(
"stream not opened", __func__, 1);
1601 if (*psize > 0) *psize = *psize - 1;
1603 L_INFO(
"no fmemopen API --> work-around: write to temp file\n", __func__);
1605 if ((fp = fopenWriteWinTempfile()) == NULL)
1606 return ERROR_INT(
"tmpfile stream not opened", __func__, 1);
1608 if ((fp = tmpfile()) == NULL)
1609 return ERROR_INT(
"tmpfile stream not opened", __func__, 1);
1613 *pdata = l_binaryReadStream(fp, psize);
1634 return ERROR_INT(
"filename not defined", __func__, 1);
1636 return ERROR_INT(
"sa not defined", __func__, 1);
1638 if ((fp = fopenWriteStream(filename,
"a")) == NULL)
1639 return ERROR_INT_1(
"stream not opened", filename, __func__, 1);
1642 return ERROR_INT_1(
"sa not appended to stream", filename, __func__, 1);
1704 return (
SARRAY *)ERROR_PTR(
"dirname not defined", __func__, NULL);
1707 return (
SARRAY *)ERROR_PTR(
"sa not made", __func__, NULL);
1745char *fname, *fullname;
1747SARRAY *sa, *safiles, *saout;
1750 return (
SARRAY *)ERROR_PTR(
"dirname not defined", __func__, NULL);
1753 return (
SARRAY *)ERROR_PTR(
"sa not made", __func__, NULL);
1758 L_WARNING(
"no files found\n", __func__);
1764 first = L_MIN(L_MAX(first, 0), n - 1);
1767 last = L_MIN(first + nfiles - 1, n - 1);
1770 for (i = first; i <= last; i++) {
1772 fullname = pathJoin(dirname, fname);
1804l_int32 i, nfiles, num, index;
1808 return (
SARRAY *)ERROR_PTR(
"sa not defined", __func__, NULL);
1817 for (i = nfiles - 1; i >= 0; i--) {
1819 num = extractNumberFromFilename(fname, numpre, numpost);
1820 if (num < 0)
continue;
1821 num = L_MIN(num + 1, maxnum);
1831 for (i = 0; i < nfiles; i++) {
1833 index = extractNumberFromFilename(fname, numpre, numpost);
1834 if (index < 0 || index >= num)
continue;
1836 if (str[0] !=
'\0') {
1837 L_WARNING(
"\n Multiple files with same number: %d\n",
1883char *gendir, *realdir, *stat_path;
1887struct dirent *pdirentry;
1892 return (
SARRAY *)ERROR_PTR(
"dirname not defined", __func__, NULL);
1893 if (dirname[0] ==
'\0')
1894 return (
SARRAY *)ERROR_PTR(
"dirname is empty", __func__, NULL);
1906 gendir = genPathname(dirname, NULL);
1907 realdir = realpath(gendir, NULL);
1909 if (realdir == NULL)
1910 return (
SARRAY *)ERROR_PTR(
"realdir not made", __func__, NULL);
1911 if ((pdir = opendir(realdir)) == NULL) {
1912 L_ERROR(
"directory %s not opened\n", __func__, realdir);
1917 while ((pdirentry = readdir(pdir))) {
1918#if HAVE_DIRFD && HAVE_FSTATAT
1922 stat_ret = fstatat(dfd, pdirentry->d_name, &st, 0);
1924 size = strlen(realdir) + strlen(pdirentry->d_name) + 2;
1925 stat_path = (
char *)LEPT_CALLOC(size, 1);
1926 snprintf(stat_path, size,
"%s/%s", realdir, pdirentry->d_name);
1927 stat_ret = stat(stat_path, &st);
1928 LEPT_FREE(stat_path);
1930 if (stat_ret == 0 && S_ISDIR(st.st_mode))
1949HANDLE hFind = INVALID_HANDLE_VALUE;
1951WIN32_FIND_DATAA ffd;
1954 return (
SARRAY *)ERROR_PTR(
"dirname not defined", __func__, NULL);
1956 realdir = genPathname(dirname, NULL);
1957 pszDir = stringJoin(realdir,
"\\*");
1960 if (strlen(pszDir) + 1 > MAX_PATH) {
1962 return (
SARRAY *)ERROR_PTR(
"dirname is too long", __func__, NULL);
1967 return (
SARRAY *)ERROR_PTR(
"safiles not made", __func__, NULL);
1970 hFind = FindFirstFileA(pszDir, &ffd);
1971 if (INVALID_HANDLE_VALUE == hFind) {
1974 return (
SARRAY *)ERROR_PTR(
"hFind not opened", __func__, NULL);
1977 while (FindNextFileA(hFind, &ffd) != 0) {
1978 if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
1980 convertSepCharsInPath(ffd.cFileName, UNIX_PATH_SEPCHAR);
#define SARRAY_VERSION_NUMBER
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
SARRAY * convertSortedToNumberedPathnames(SARRAY *sa, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
convertSortedToNumberedPathnames()
SARRAY * sarrayConvertWordsToLines(SARRAY *sa, l_int32 linesize)
sarrayConvertWordsToLines()
SARRAY * sarrayCreate(l_int32 n)
sarrayCreate()
l_ok sarrayWriteStderr(SARRAY *sa)
sarrayWriteStderr()
static const l_int32 InitialPtrArraySize
char * sarrayToStringRange(SARRAY *sa, l_int32 first, l_int32 nstrings, l_int32 addnlflag)
sarrayToStringRange()
l_ok sarrayJoin(SARRAY *sa1, SARRAY *sa2)
sarrayJoin()
SARRAY * sarrayRead(const char *filename)
sarrayRead()
l_ok sarrayPadToSameSize(SARRAY *sa1, SARRAY *sa2, const char *padstring)
sarrayPadToSameSize()
char * sarrayRemoveString(SARRAY *sa, l_int32 index)
sarrayRemoveString()
l_ok sarrayReplaceString(SARRAY *sa, l_int32 index, char *newstr, l_int32 copyflag)
sarrayReplaceString()
l_ok sarrayAppend(const char *filename, SARRAY *sa)
sarrayAppend()
SARRAY * sarraySelectRange(SARRAY *sain, l_int32 first, l_int32 last)
sarraySelectRange()
char * sarrayToString(SARRAY *sa, l_int32 addnlflag)
sarrayToString()
SARRAY * sarraySelectBySubstring(SARRAY *sain, const char *substr)
sarraySelectBySubstring()
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
SARRAY * sarrayCreateLinesFromString(const char *string, l_int32 blankflag)
sarrayCreateLinesFromString()
SARRAY * getFilenamesInDirectory(const char *dirname)
getFilenamesInDirectory()
SARRAY * sarrayCreateInitialized(l_int32 n, const char *initstr)
sarrayCreateInitialized()
l_ok sarrayClear(SARRAY *sa)
sarrayClear()
SARRAY * sarrayReadMem(const l_uint8 *data, size_t size)
sarrayReadMem()
char ** sarrayGetArray(SARRAY *sa, l_int32 *pnalloc, l_int32 *pn)
sarrayGetArray()
static l_int32 sarrayExtendArray(SARRAY *sa)
sarrayExtendArray()
l_ok sarrayAddString(SARRAY *sa, const char *string, l_int32 copyflag)
sarrayAddString()
SARRAY * sarrayReadStream(FILE *fp)
sarrayReadStream()
SARRAY * sarrayClone(SARRAY *sa)
sarrayClone()
SARRAY * sarrayCreateWordsFromString(const char *string)
sarrayCreateWordsFromString()
l_ok sarrayWrite(const char *filename, SARRAY *sa)
sarrayWrite()
l_int32 sarrayParseRange(SARRAY *sa, l_int32 start, l_int32 *pactualstart, l_int32 *pend, l_int32 *pnewstart, const char *substr, l_int32 loc)
sarrayParseRange()
SARRAY * sarrayCopy(SARRAY *sa)
sarrayCopy()
SARRAY * getNumberedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
getNumberedPathnamesInDirectory()
l_ok sarrayWriteStream(FILE *fp, SARRAY *sa)
sarrayWriteStream()
l_ok sarrayWriteMem(l_uint8 **pdata, size_t *psize, SARRAY *sa)
sarrayWriteMem()
l_ok sarrayAppendRange(SARRAY *sa1, SARRAY *sa2, l_int32 start, l_int32 end)
sarrayAppendRange()
SARRAY * sarrayConcatUniformly(SARRAY *sa, l_int32 n, l_int32 addnlflag)
sarrayConcatUniformly()