/*
**  GNU Pth - The GNU Portable Threads
**  Copyright (c) 1999-2001 Ralf S. Engelschall <rse@engelschall.com>
**
**  This file is part of GNU Pth, a non-preemptive thread scheduling
**  library which can be found at http://www.gnu.org/software/pth/.
**
**  This library is free software; you can redistribute it and/or
**  modify it under the terms of the GNU Lesser General Public
**  License as published by the Free Software Foundation; either
**  version 2 of the License, or (at your option) any later version.
**
**  This library is distributed in the hope that it will be useful,
**  but WITHOUT ANY WARRANTY; without even the implied warranty of
**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
**  Lesser General Public License for more details.
**
**  You should have received a copy of the GNU Lesser General Public
**  License along with this library; if not, write to the Free Software
**  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
**  USA, or contact Ralf S. Engelschall <rse@engelschall.com>.
**
**  pth.c: Pth all-in-one source (AUTO-GENERATED, DO NOT EDIT!)
*/

#include "pth_p.h"

/* ==== pth_compat.c ==== */

COMPILER_HAPPYNESS(pth_compat)

/*
 *  Replacement for strerror(3)
 */

#if cpp
#if !defined(HAVE_STRERROR)
char *_pth_compat_strerror(int);
#define strerror(errnum) _pth_compat_strerror(errnum)
#endif
#endif

#if !defined(HAVE_STRERROR)
extern char *const sys_errlist[];
char *_pth_compat_strerror(int errnum)
{
    char *cp;

    cp = sys_errlist[errnum];
    return (cp);
}
#endif

/* ==== pth_debug.c ==== */

#if cpp

#ifndef PTH_DEBUG

#define pth_debug1(a1)                     /* NOP */
#define pth_debug2(a1, a2)                 /* NOP */
#define pth_debug3(a1, a2, a3)             /* NOP */
#define pth_debug4(a1, a2, a3, a4)         /* NOP */
#define pth_debug5(a1, a2, a3, a4, a5)     /* NOP */
#define pth_debug6(a1, a2, a3, a4, a5, a6) /* NOP */

#else

#define pth_debug1(a1)                     pth_debug(__FILE__, __LINE__, 1, a1)
#define pth_debug2(a1, a2)                 pth_debug(__FILE__, __LINE__, 2, a1, a2)
#define pth_debug3(a1, a2, a3)             pth_debug(__FILE__, __LINE__, 3, a1, a2, a3)
#define pth_debug4(a1, a2, a3, a4)         pth_debug(__FILE__, __LINE__, 4, a1, a2, a3, a4)
#define pth_debug5(a1, a2, a3, a4, a5)     pth_debug(__FILE__, __LINE__, 5, a1, a2, a3, a4, a5)
#define pth_debug6(a1, a2, a3, a4, a5, a6) pth_debug(__FILE__, __LINE__, 6, a1, a2, a3, a4, a5, a6)

#endif /* PTH_DEBUG */

#endif /* cpp */

intern void pth_debug(const char *file, int line, int argc, const char *fmt, ...)
{
    va_list ap;
    static char str[1024];
    size_t n;

    pth_shield {
        va_start(ap, fmt);
        if (file != NULL)
            pth_snprintf(str, sizeof(str), "%d:%s:%04d: ", (int)getpid(), file, line);
        else
            str[0] = NUL;
        n = strlen(str);
        if (argc == 1)
            pth_util_cpystrn(str+n, fmt, sizeof(str)-n);
        else
            pth_vsnprintf(str+n, sizeof(str)-n, fmt, ap);
        va_end(ap);
        n = strlen(str);
        str[n++] = '\n';
        pth_sc(write)(STDERR_FILENO, str, n);
    }
    return;
}

/* dump out a page to stderr summarizing the internal state of Pth */
intern void pth_dumpstate(FILE *fp)
{
    fprintf(fp, "+----------------------------------------------------------------------\n");
    fprintf(fp, "| Pth Version: %s\n", PTH_VERSION_STR);
    fprintf(fp, "| Load Average: %.2f\n", pth_loadval);
    pth_dumpqueue(fp, "NEW", &pth_NQ);
    pth_dumpqueue(fp, "READY", &pth_RQ);
    fprintf(fp, "| Thread Queue RUNNING:\n");
    fprintf(fp, "|   1. thread 0x%lx (\"%s\")\n",
            (unsigned long)pth_current, pth_current->name);
    pth_dumpqueue(fp, "WAITING", &pth_WQ);
    pth_dumpqueue(fp, "SUSPENDED", &pth_SQ);
    pth_dumpqueue(fp, "DEAD", &pth_DQ);
    fprintf(fp, "+----------------------------------------------------------------------\n");
    return;
}

intern void pth_dumpqueue(FILE *fp, const char *qn, pth_pqueue_t *q)
{
    pth_t t;
    int n;
    int i;

    fprintf(fp, "| Thread Queue %s:\n", qn);
    n = pth_pqueue_elements(q);
    if (n == 0)
        fprintf(fp, "|   no threads\n");
    i = 1;
    for (t = pth_pqueue_head(q); t != NULL; t = pth_pqueue_walk(q, t, PTH_WALK_NEXT)) {
        fprintf(fp, "|   %d. thread 0x%lx (\"%s\")\n", i++, (unsigned long)t, t->name);
    }
    return;
}

/* ==== pth_syscall.c ==== */

/* some exported variables for object layer checks */
int pth_syscall_soft = PTH_SYSCALL_SOFT;
int pth_syscall_hard = PTH_SYSCALL_HARD;

#if cpp
#if PTH_SYSCALL_HARD
/* hard syscall mapping */
#if HAVE_SYS_SYSCALL_H
#include <sys/syscall.h>
#endif
#ifdef HAVE_SYS_SOCKETCALL_H
#include <sys/socketcall.h>
#endif
#define pth_sc(func) pth_sc_##func
#else /* !PTH_SYSCALL_HARD */
/* no hard syscall mapping */
#define pth_sc(func) func
#endif /* PTH_SYSCALL_HARD */
#endif /* cpp */

/*
 * Unprotect us from the namespace conflict with the
 * syscall prototypes in system headers.
 */
#undef fork
#undef waitpid
#undef system
#undef nanosleep
#undef usleep
#undef sleep
#undef sigprocmask
#undef sigwait
#undef select
#undef pselect
#undef poll
#undef connect
#undef accept
#undef read
#undef write
#undef readv
#undef writev
#undef recv
#undef send
#undef recvfrom
#undef sendto
#undef pread
#undef pwrite

/* internal data structures */
#if cpp
typedef int (*pth_syscall_fct_t)();
typedef struct {
    char             *name;    /* name of system/function call */
    pth_syscall_fct_t addr;    /* address of wrapped system/function call */
} pth_syscall_fct_tab_t;
typedef struct {
    char             *path;    /* path to dynamic library */
    void             *handle;  /* handle of dynamic library */
} pth_syscall_lib_tab_t;
#endif

#if PTH_SYSCALL_HARD

/* NUL-spiked copy of library paths */
static char *pth_syscall_libs = NULL;

/* table of dynamic libraries and their resolving handles */
static pth_syscall_lib_tab_t pth_syscall_lib_tab[128] = {
    { NULL, NULL }
};

/* table of syscalls and their resolved function pointers */
intern pth_syscall_fct_tab_t pth_syscall_fct_tab[] = {
    /* Notice: order must match the macro values above */
#define PTH_SCF_fork          0
#define PTH_SCF_waitpid       1
#define PTH_SCF_system        2
#define PTH_SCF_nanosleep     3
#define PTH_SCF_usleep        4
#define PTH_SCF_sleep         5
#define PTH_SCF_sigprocmask   6
#define PTH_SCF_sigwait       7
#define PTH_SCF_select        8
#define PTH_SCF_poll          9
#define PTH_SCF_connect       10
#define PTH_SCF_accept        11
#define PTH_SCF_read          12
#define PTH_SCF_write         13
#define PTH_SCF_readv         14
#define PTH_SCF_writev        15
#define PTH_SCF_recv          16
#define PTH_SCF_send          17
#define PTH_SCF_recvfrom      18
#define PTH_SCF_sendto        19
#define PTH_SCF_pread         20
#define PTH_SCF_pwrite        21
    { "fork",        NULL },
    { "waitpid",     NULL },
    { "system",      NULL },
    { "nanosleep",   NULL },
    { "usleep",      NULL },
    { "sleep",       NULL },
    { "sigprocmask", NULL },
    { "sigwait",     NULL },
    { "select",      NULL },
    { "poll",        NULL },
    { "connect",     NULL },
    { "accept",      NULL },
    { "read",        NULL },
    { "write",       NULL },
    { "readv",       NULL },
    { "writev",      NULL },
    { "recv",        NULL },
    { "send",        NULL },
    { "recvfrom",    NULL },
    { "sendto",      NULL },
    { "pread",       NULL },
    { "pwrite",      NULL },
    { NULL,          NULL }
};
#endif

/* syscall wrapping initialization */
intern void pth_syscall_init(void)
{
#if PTH_SYSCALL_HARD
    int i;
    int j;
    char *cpLib;
    char *cp;

    /* fill paths of libraries into internal table */
    pth_syscall_libs = strdup(PTH_SYSCALL_LIBS);
    cpLib = pth_syscall_libs;
    for (i = 0; i < (sizeof(pth_syscall_lib_tab)/sizeof(pth_syscall_lib_tab_t))-1; ) {
        if ((cp = strchr(cpLib, ':')) != NULL)
            *cp++ = '\0';
        pth_syscall_lib_tab[i].path   = cpLib;
        pth_syscall_lib_tab[i].handle = NULL;
        i++;
        if (cp != NULL)
            cpLib = cp;
        else
            break;
    }
    pth_syscall_lib_tab[i].path = NULL;

#if defined(HAVE_DLOPEN) && defined(HAVE_DLSYM)
    /* determine addresses of syscall functions */
    for (i = 0; pth_syscall_fct_tab[i].name != NULL; i++) {

        /* attempt #1: fetch from implicit successor libraries */
#if defined(HAVE_DLSYM) && defined(HAVE_RTLD_NEXT)
        pth_syscall_fct_tab[i].addr = (pth_syscall_fct_t)
            dlsym(RTLD_NEXT, pth_syscall_fct_tab[i].name);
#endif

        /* attempt #2: fetch from explicitly loaded C library */
        if (pth_syscall_fct_tab[i].addr == NULL) {

            /* first iteration: try resolve from already loaded libraries */
            for (j = 0; pth_syscall_lib_tab[j].path != NULL; j++) {
                if (pth_syscall_lib_tab[j].handle != NULL) {
                    pth_syscall_fct_tab[i].addr = (pth_syscall_fct_t)
                        dlsym(pth_syscall_lib_tab[j].handle,
                              pth_syscall_fct_tab[i].name);
                    if (pth_syscall_fct_tab[i].addr != NULL)
                        break;
                }
            }

            /* second iteration: try to load more libraries for resolving */
            if (pth_syscall_fct_tab[i].addr == NULL) {
                for (j = 0; pth_syscall_lib_tab[j].path != NULL; j++) {
                    if (pth_syscall_lib_tab[j].handle == NULL) {
                        if ((pth_syscall_lib_tab[j].handle =
                             dlopen(pth_syscall_lib_tab[j].path, RTLD_LAZY)) == NULL)
                            continue;
                        pth_syscall_fct_tab[i].addr = (pth_syscall_fct_t)
                            dlsym(pth_syscall_lib_tab[j].handle,
                                  pth_syscall_fct_tab[i].name);
                        if (pth_syscall_fct_tab[i].addr != NULL)
                            break;
                    }
                }
            }
        }
    }
#endif
#endif
    return;
}

/* syscall wrapping initialization */
intern void pth_syscall_kill(void)
{
#if PTH_SYSCALL_HARD
    int i;

#if defined(HAVE_DLOPEN) && defined(HAVE_DLSYM)
    /* unload all explicitly loaded libraries */
    for (i = 0; pth_syscall_lib_tab[i].path != NULL; i++) {
        if (pth_syscall_lib_tab[i].handle != NULL) {
            dlclose(pth_syscall_lib_tab[i].handle);
            pth_syscall_lib_tab[i].handle = NULL;
        }
        pth_syscall_lib_tab[i].path = NULL;
    }
#endif
    free(pth_syscall_libs);
    pth_syscall_libs = NULL;
#endif
    return;
}

#if PTH_SYSCALL_HARD

/* utility macro for returning syscall errors */
#define PTH_SYSCALL_ERROR(return_val,errno_val,syscall) \
    do { fprintf(stderr, \
                 "pth:WARNING: unable to perform syscall `%s': " \
                 "no implementation resolvable\n", syscall); \
         errno = (errno_val); \
         return (return_val); \
    } while (0)

/* ==== Pth hard syscall wrapper for fork(2) ==== */
pid_t fork(void);
pid_t fork(void)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_fork();
}
intern pid_t pth_sc_fork(void)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_fork].addr != NULL)
        return ((pid_t (*)(void))
               pth_syscall_fct_tab[PTH_SCF_fork].addr)
               ();
#if defined(HAVE_SYSCALL) && defined(SYS_fork)
    else return (pid_t)syscall(SYS_fork);
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "fork");
#endif
}

/* ==== Pth hard syscall wrapper for nanosleep(3) ==== */
int nanosleep(const struct timespec *, struct timespec *);
int nanosleep(const struct timespec *rqtp, struct timespec *rmtp)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_nanosleep(rqtp, rmtp);
}
/* NOTICE: internally fully emulated, so still no
   internal exit point pth_sc_nanosleep necessary! */

/* ==== Pth hard syscall wrapper for usleep(3) ==== */
int usleep(unsigned int);
int usleep(unsigned int sec)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_usleep(sec);
}
/* NOTICE: internally fully emulated, so still no
   internal exit point pth_sc_usleep necessary! */

/* ==== Pth hard syscall wrapper for sleep(3) ==== */
unsigned int sleep(unsigned int);
unsigned int sleep(unsigned int sec)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_sleep(sec);
}
/* NOTICE: internally fully emulated, so still no
   internal exit point pth_sc_sleep necessary! */

/* ==== Pth hard syscall wrapper for system(3) ==== */
int system(const char *);
int system(const char *cmd)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_system(cmd);
}
/* NOTICE: internally fully emulated, so still no
   internal exit point pth_sc_system necessary! */

/* ==== Pth hard syscall wrapper for sigprocmask(2) ==== */
int sigprocmask(int, const sigset_t *, sigset_t *);
int sigprocmask(int how, const sigset_t *set, sigset_t *oset)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_sigmask(how, set, oset);
}
intern int pth_sc_sigprocmask(int how, const sigset_t *set, sigset_t *oset)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_sigprocmask].addr != NULL)
        return ((int (*)(int, const sigset_t *, sigset_t *))
               pth_syscall_fct_tab[PTH_SCF_sigprocmask].addr)
               (how, set, oset);
#if defined(HAVE_SYSCALL) && defined(SYS___sigprocmask14) /* NetBSD */
    else return (int)syscall(SYS___sigprocmask14, how, set, oset);
#elif defined(HAVE_SYSCALL) && defined(SYS_sigprocmask)
    else return (int)syscall(SYS_sigprocmask, how, set, oset);
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "sigprocmask");
#endif
}

/* ==== Pth hard syscall wrapper for sigwait(3) ==== */
int sigwait(const sigset_t *, int *);
int sigwait(const sigset_t *set, int *sigp)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_sigwait(set, sigp);
}
/* NOTICE: internally fully emulated, so still no
   internal exit point pth_sc_sigwait necessary! */

/* ==== Pth hard syscall wrapper for waitpid(2) ==== */
pid_t waitpid(pid_t, int *, int);
pid_t waitpid(pid_t wpid, int *status, int options)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_waitpid(wpid, status, options);
}
intern pid_t pth_sc_waitpid(pid_t wpid, int *status, int options)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_waitpid].addr != NULL)
        return ((pid_t (*)(pid_t, int *, int))
               pth_syscall_fct_tab[PTH_SCF_waitpid].addr)
               (wpid, status, options);
#if defined(HAVE_SYSCALL) && defined(SYS_waitpid)
    else return (pid_t)syscall(SYS_waitpid, wpid, status, options);
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "waitpid");
#endif
}

/* ==== Pth hard syscall wrapper for connect(2) ==== */
int connect(int, const struct sockaddr *, socklen_t);
int connect(int s, const struct sockaddr *addr, socklen_t addrlen)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_connect(s, addr, addrlen);
}
intern int pth_sc_connect(int s, const struct sockaddr *addr, socklen_t addrlen)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_connect].addr != NULL)
        return ((int (*)(int, const struct sockaddr *, socklen_t))
               pth_syscall_fct_tab[PTH_SCF_connect].addr)
               (s, addr, addrlen);
#if defined(HAVE_SYSCALL) && defined(SYS_connect)
    else return (int)syscall(SYS_connect, s, addr, addrlen);
#elif defined(HAVE_SYSCALL) && defined(SYS_socketcall) && defined(SOCKOP_connect) /* Linux */
    else {
        unsigned long args[3];
        args[0] = (unsigned long)s;
        args[1] = (unsigned long)addr;
        args[2] = (unsigned long)addrlen;
        return (int)syscall(SYS_socketcall, SOCKOP_connect, args);
    }
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "connect");
#endif
}

/* ==== Pth hard syscall wrapper for accept(2) ==== */
int accept(int, struct sockaddr *, socklen_t *);
int accept(int s, struct sockaddr *addr, socklen_t *addrlen)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_accept(s, addr, addrlen);
}
intern int pth_sc_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_accept].addr != NULL)
        return ((int (*)(int, struct sockaddr *, socklen_t *))
               pth_syscall_fct_tab[PTH_SCF_accept].addr)
               (s, addr, addrlen);
#if defined(HAVE_SYSCALL) && defined(SYS_accept)
    else return (int)syscall(SYS_accept, s, addr, addrlen);
#elif defined(HAVE_SYSCALL) && defined(SYS_socketcall) && defined(SOCKOP_accept) /* Linux */
    else {
        unsigned long args[3];
        args[0] = (unsigned long)s;
        args[1] = (unsigned long)addr;
        args[2] = (unsigned long)addrlen;
        return (int)syscall(SYS_socketcall, SOCKOP_accept, args);
    }
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "accept");
#endif
}

/* ==== Pth hard syscall wrapper for select(2) ==== */
int select(int, fd_set *, fd_set *, fd_set *, struct timeval *);
int select(int nfds, fd_set *readfds, fd_set *writefds,
           fd_set *exceptfds, struct timeval *timeout)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_select(nfds, readfds, writefds, exceptfds, timeout);
}
intern int pth_sc_select(int nfds, fd_set *readfds, fd_set *writefds,
                         fd_set *exceptfds, struct timeval *timeout)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_select].addr != NULL)
        return ((int (*)(int, fd_set *, fd_set *, fd_set *, struct timeval *))
               pth_syscall_fct_tab[PTH_SCF_select].addr)
               (nfds, readfds, writefds, exceptfds, timeout);
#if defined(HAVE_SYSCALL) && defined(SYS__newselect) /* Linux */
    else return (int)syscall(SYS__newselect, nfds, readfds, writefds, exceptfds, timeout);
#elif defined(HAVE_SYSCALL) && defined(SYS_select)
    else return (int)syscall(SYS_select, nfds, readfds, writefds, exceptfds, timeout);
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "accept");
#endif
}

/* ==== Pth hard syscall wrapper for pselect(2) ==== */
int pselect(int, fd_set *, fd_set *, fd_set *, const struct timespec *, const sigset_t *);
int pselect(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds,
            const struct timespec *ts, const sigset_t *mask)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_pselect(nfds, rfds, wfds, efds, ts, mask);
}
/* NOTICE: internally fully emulated, so still no
   internal exit point pth_sc_pselect necessary! */

/* ==== Pth hard syscall wrapper for poll(2) ==== */
int poll(struct pollfd *, nfds_t, int);
int poll(struct pollfd *pfd, nfds_t nfd, int timeout)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_poll(pfd, nfd, timeout);
}
/* NOTICE: internally fully emulated, so still no
   internal exit point pth_sc_poll necessary! */

/* ==== Pth hard syscall wrapper for read(2) ==== */
ssize_t read(int, void *, size_t);
ssize_t read(int fd, void *buf, size_t nbytes)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_read(fd, buf, nbytes);
}
intern ssize_t pth_sc_read(int fd, void *buf, size_t nbytes)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_read].addr != NULL)
        return ((ssize_t (*)(int, void *, size_t))
               pth_syscall_fct_tab[PTH_SCF_read].addr)
               (fd, buf, nbytes);
#if defined(HAVE_SYSCALL) && defined(SYS_read)
    else return (ssize_t)syscall(SYS_read, fd, buf, nbytes);
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "read");
#endif
}

/* ==== Pth hard syscall wrapper for write(2) ==== */
ssize_t write(int, const void *, size_t);
ssize_t write(int fd, const void *buf, size_t nbytes)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_write(fd, buf, nbytes);
}
intern ssize_t pth_sc_write(int fd, const void *buf, size_t nbytes)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_write].addr != NULL)
        return ((ssize_t (*)(int, const void *, size_t))
               pth_syscall_fct_tab[PTH_SCF_write].addr)
               (fd, buf, nbytes);
#if defined(HAVE_SYSCALL) && defined(SYS_write)
    else return (ssize_t)syscall(SYS_write, fd, buf, nbytes);
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "write");
#endif
}

/* ==== Pth hard syscall wrapper for readv(2) ==== */
ssize_t readv(int, const struct iovec *, int);
ssize_t readv(int fd, const struct iovec *iov, int iovcnt)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_readv(fd, iov, iovcnt);
}
intern ssize_t pth_sc_readv(int fd, const struct iovec *iov, int iovcnt)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_readv].addr != NULL)
        return ((ssize_t (*)(int, const struct iovec *, int))
               pth_syscall_fct_tab[PTH_SCF_readv].addr)
               (fd, iov, iovcnt);
#if defined(HAVE_SYSCALL) && defined(SYS_readv)
    else return (ssize_t)syscall(SYS_readv, fd, iov, iovcnt);
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "readv");
#endif
}

/* ==== Pth hard syscall wrapper for writev(2) ==== */
ssize_t writev(int, const struct iovec *, int);
ssize_t writev(int fd, const struct iovec *iov, int iovcnt)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_writev(fd, iov, iovcnt);
}
intern ssize_t pth_sc_writev(int fd, const struct iovec *iov, int iovcnt)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_writev].addr != NULL)
        return ((ssize_t (*)(int, const struct iovec *, int))
               pth_syscall_fct_tab[PTH_SCF_writev].addr)
               (fd, iov, iovcnt);
#if defined(HAVE_SYSCALL) && defined(SYS_writev)
    else return (ssize_t)syscall(SYS_writev, fd, iov, iovcnt);
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "writev");
#endif
}

/* ==== Pth hard syscall wrapper for pread(2) ==== */
ssize_t pread(int, void *, size_t, off_t);
ssize_t pread(int fd, void *buf, size_t nbytes, off_t offset)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_pread(fd, buf, nbytes, offset);
}
/* NOTICE: internally fully emulated, so still no
   internal exit point pth_sc_pread necessary! */

/* ==== Pth hard syscall wrapper for pwrite(2) ==== */
ssize_t pwrite(int, const void *, size_t, off_t);
ssize_t pwrite(int fd, const void *buf, size_t nbytes, off_t offset)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_pwrite(fd, buf, nbytes, offset);
}
/* NOTICE: internally fully emulated, so still no
   internal exit point pth_sc_pwrite necessary! */

/* ==== Pth hard syscall wrapper for recvfrom(2) ==== */
ssize_t recvfrom(int, void *, size_t, int, struct sockaddr *, socklen_t *);
ssize_t recvfrom(int fd, void *buf, size_t nbytes, int flags, struct sockaddr *from, socklen_t *fromlen)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_recvfrom(fd, buf, nbytes, flags, from, fromlen);
}
intern ssize_t pth_sc_recvfrom(int fd, void *buf, size_t nbytes, int flags, struct sockaddr *from, socklen_t *fromlen)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_recvfrom].addr != NULL)
        return ((ssize_t (*)(int, void *, size_t, int, struct sockaddr *, socklen_t *))
               pth_syscall_fct_tab[PTH_SCF_recvfrom].addr)
               (fd, buf, nbytes, flags, from, fromlen);
#if defined(HAVE_SYSCALL) && defined(SYS_recvfrom)
    else return (ssize_t)syscall(SYS_recvfrom, fd, buf, nbytes, flags, from, fromlen);
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "recvfrom");
#endif
}

/* ==== Pth hard syscall wrapper for sendto(2) ==== */
ssize_t sendto(int, const void *, size_t, int, const struct sockaddr *, socklen_t);
ssize_t sendto(int fd, const void *buf, size_t nbytes, int flags, const struct sockaddr *to, socklen_t tolen)
{
    /* external entry point for application */
    pth_implicit_init();
    return pth_sendto(fd, buf, nbytes, flags, to, tolen);
}
intern ssize_t pth_sc_sendto(int fd, const void *buf, size_t nbytes, int flags, const struct sockaddr *to, socklen_t tolen)
{
    /* internal exit point for Pth */
    if (pth_syscall_fct_tab[PTH_SCF_sendto].addr != NULL)
        return ((ssize_t (*)(int, const void *, size_t, int, const struct sockaddr *, socklen_t))
               pth_syscall_fct_tab[PTH_SCF_recvfrom].addr)
               (fd, buf, nbytes, flags, to, tolen);
#if defined(HAVE_SYSCALL) && defined(SYS_sendto)
    else return (ssize_t)syscall(SYS_sendto, fd, buf, nbytes, flags, to, tolen);
#else
    else PTH_SYSCALL_ERROR(-1, ENOSYS, "sendto");
#endif
}

#endif /* PTH_SYSCALL_HARD */

/* ==== pth_errno.c ==== */

#if cpp

/* enclose errno in a block */
#define pth_shield \
        for ( pth_errno_storage = errno, \
              pth_errno_flag = TRUE; \
              pth_errno_flag; \
              errno = pth_errno_storage, \
              pth_errno_flag = FALSE )

/* return plus setting an errno value */
#if defined(PTH_DEBUG)
#define pth_error(return_val,errno_val) \
        (errno = (errno_val), \
        pth_debug4("return 0x%lx with errno %d(\"%s\")", \
                   (unsigned long)(return_val), (errno), strerror((errno))), \
        (return_val))
#else
#define pth_error(return_val,errno_val) \
        (errno = (errno_val), (return_val))
#endif

#endif /* cpp */

intern int pth_errno_storage = 0;
intern int pth_errno_flag    = 0;

/* ==== pth_ring.c ==== */

/* initialize ring; O(1) */
intern void pth_ring_init(pth_ring_t *r)
{
    if (r == NULL)
        return;
    r->r_hook  = NULL;
    r->r_nodes = 0;
    return;
}

/* return number of nodes in ring; O(1) */
#if cpp
#define pth_ring_elements(r) \
    ((r) == NULL ? (-1) : (r)->r_nodes)
#endif

/* return first node in ring; O(1) */
#if cpp
#define pth_ring_first(r) \
    ((r) == NULL ? NULL : (r)->r_hook)
#endif

/* return last node in ring; O(1) */
#if cpp
#define pth_ring_last(r) \
    ((r) == NULL ? NULL : ((r)->r_hook == NULL ? NULL : (r)->r_hook->rn_prev))
#endif

/* walk to next node in ring; O(1) */
#if cpp
#define pth_ring_next(r, rn) \
    (((r) == NULL || (rn) == NULL) ? NULL : ((rn)->rn_next == (r)->r_hook ? NULL : (rn)->rn_next))
#endif

/* walk to previous node in ring; O(1) */
#if cpp
#define pth_ring_prev(r, rn) \
    (((r) == NULL || (rn) == NULL) ? NULL : ((rn)->rn_prev == (r)->r_hook->rn_prev ? NULL : (rn)->rn_prev))
#endif

/* insert node into ring; O(1) */
#if cpp
#define pth_ring_insert(r, rn) \
    pth_ring_append((r), (rn))
#endif

/* insert node after a second node in ring; O(1) */
intern void pth_ring_insert_after(pth_ring_t *r, pth_ringnode_t *rn1, pth_ringnode_t *rn2)
{
    if (r == NULL || rn1 == NULL || rn2 == NULL)
        return;
    rn2->rn_prev = rn1;
    rn2->rn_next = rn1->rn_next;
    rn2->rn_prev->rn_next = rn2;
    rn2->rn_next->rn_prev = rn2;
    r->r_nodes++;
    return;
}

/* insert node before a second node in ring; O(1) */
intern void pth_ring_insert_before(pth_ring_t *r, pth_ringnode_t *rn1, pth_ringnode_t *rn2)
{
    if (r == NULL || rn1 == NULL || rn2 == NULL)
        return;
    rn2->rn_next = rn1;
    rn2->rn_prev = rn1->rn_prev;
    rn2->rn_prev->rn_next = rn2;
    rn2->rn_next->rn_prev = rn2;
    r->r_nodes++;
    return;
}

/* delete an node from ring; O(1) */
intern void pth_ring_delete(pth_ring_t *r, pth_ringnode_t *rn)
{
    if (r == NULL || rn == NULL)
        return;
    if (r->r_hook == rn && rn->rn_prev == rn && rn->rn_next == rn)
        r->r_hook = NULL;
    else {
        if (r->r_hook == rn)
            r->r_hook = rn->rn_next;
        rn->rn_prev->rn_next = rn->rn_next;
        rn->rn_next->rn_prev = rn->rn_prev;
    }
    r->r_nodes--;
    return;
}

/* prepend an node to ring; O(1) */
intern void pth_ring_prepend(pth_ring_t *r, pth_ringnode_t *rn)
{
    if (r == NULL || rn == NULL)
        return;
    if (r->r_hook == NULL) {
        r->r_hook = rn;
        rn->rn_next = rn;
        rn->rn_prev = rn;
    }
    else {
        rn->rn_next = r->r_hook;
        rn->rn_prev = r->r_hook->rn_prev;
        rn->rn_next->rn_prev = rn;
        rn->rn_prev->rn_next = rn;
        r->r_hook = rn;
    }
    r->r_nodes++;
    return;
}

/* append an node to ring; O(1) */
intern void pth_ring_append(pth_ring_t *r, pth_ringnode_t *rn)
{
    if (r == NULL || rn == NULL)
        return;
    if (r->r_hook == NULL) {
        r->r_hook = rn;
        rn->rn_next = rn;
        rn->rn_prev = rn;
    }
    else {
        rn->rn_next = r->r_hook;
        rn->rn_prev = r->r_hook->rn_prev;
        rn->rn_next->rn_prev = rn;
        rn->rn_prev->rn_next = rn;
    }
    r->r_nodes++;
    return;
}

/* treat ring as stack: push node onto stack; O(1) */
#if cpp
#define pth_ring_push(r, rn) \
    pth_ring_prepend((r), (rn))
#endif

/* treat ring as stack: pop node from stack; O(1) */
intern pth_ringnode_t *pth_ring_pop(pth_ring_t *r)
{
    pth_ringnode_t *rn;

    rn = pth_ring_first(r);
    if (rn != NULL)
        pth_ring_delete(r, rn);
    return rn;
}

/* treat ring as queue: favorite a node in the ring; O(1) */
intern int pth_ring_favorite(pth_ring_t *r, pth_ringnode_t *rn)
{
    if (r == NULL)
        return FALSE;
    if (r->r_hook == NULL)
        return FALSE;
    /* element is perhaps already at ring hook */
    if (r->r_hook == rn)
        return TRUE;
    /* move to hook of ring */
    pth_ring_delete(r, rn);
    pth_ring_prepend(r, rn);
    return TRUE;
}

/* treat ring as queue: enqueue node; O(1) */
#if cpp
#define pth_ring_enqueue(r, rn) \
    pth_ring_prepend((r), (rn))
#endif

/* treat ring as queue: dequeue node; O(1) */
intern pth_ringnode_t *pth_ring_dequeue(pth_ring_t *r)
{
    pth_ringnode_t *rn;

    rn = pth_ring_last(r);
    if (rn != NULL)
        pth_ring_delete(r, rn);
    return rn;
}

/* check whether node is contained in ring; O(n) */
intern int pth_ring_contains(pth_ring_t *r, pth_ringnode_t *rns)
{
    pth_ringnode_t *rn;
    int rc;

    if (r == NULL || rns == NULL)
        return pth_error(FALSE, EINVAL);
    rc = FALSE;
    rn = r->r_hook;
    if (rn != NULL) {
        do {
            if (rn == rns) {
                rc = TRUE;
                break;
            }
            rn = rn->rn_next;
        } while (rn != r->r_hook);
    }
    return rc;
}

/* ==== pth_mctx.c ==== */

#if cpp

/*
 * machine context state structure
 *
 * In `jb' the CPU registers, the program counter, the stack
 * pointer and (usually) the signals mask is stored. When the
 * signal mask cannot be implicitly stored in `jb', it's
 * alternatively stored explicitly in `sigs'. The `error' stores
 * the value of `errno'.
 */

#if PTH_MCTX_MTH(mcsc)
#include <ucontext.h>
#endif

typedef struct pth_mctx_st pth_mctx_t;
struct pth_mctx_st {
#if PTH_MCTX_MTH(mcsc)
    ucontext_t uc;
#elif PTH_MCTX_MTH(sjlj)
    pth_sigjmpbuf jb;
#else
#error "unknown mctx method"
#endif
    sigset_t sigs;
#if PTH_MCTX_DSP(sjlje)
    sigset_t block;
#endif
    int error;
};

/*
** ____ MACHINE STATE SWITCHING ______________________________________
*/

/*
 * save the current machine context
 */
#if PTH_MCTX_MTH(mcsc)
#define pth_mctx_save(mctx) \
        ( (mctx)->error = errno, \
          getcontext(&(mctx)->uc) )
#elif PTH_MCTX_MTH(sjlj) && PTH_MCTX_DSP(sjlje)
#define pth_mctx_save(mctx) \
        ( (mctx)->error = errno, \
          pth_sc(sigprocmask)(SIG_SETMASK, &((mctx)->block), NULL), \
          pth_sigsetjmp((mctx)->jb) )
#elif PTH_MCTX_MTH(sjlj)
#define pth_mctx_save(mctx) \
        ( (mctx)->error = errno, \
          pth_sigsetjmp((mctx)->jb) )
#else
#error "unknown mctx method"
#endif

/*
 * restore the current machine context
 * (at the location of the old context)
 */
#if PTH_MCTX_MTH(mcsc)
#define pth_mctx_restore(mctx) \
        ( errno = (mctx)->error, \
          (void)setcontext(&(mctx)->uc) )
#elif PTH_MCTX_MTH(sjlj)
#define pth_mctx_restore(mctx) \
        ( errno = (mctx)->error, \
          (void)pth_siglongjmp((mctx)->jb, 1) )
#else
#error "unknown mctx method"
#endif

/*
 * restore the current machine context
 * (at the location of the new context)
 */
#if PTH_MCTX_MTH(sjlj) && PTH_MCTX_DSP(sjlje)
#define pth_mctx_restored(mctx) \
        pth_sc(sigprocmask)(SIG_SETMASK, &((mctx)->sigs), NULL)
#else
#define pth_mctx_restored(mctx) \
        /*nop*/
#endif

/*
 * switch from one machine context to another
 */
#define SWITCH_DEBUG_LINE \
        "==== THREAD CONTEXT SWITCH ==========================================="
#ifdef PTH_DEBUG
#define  _pth_mctx_switch_debug pth_debug(NULL, 0, 1, SWITCH_DEBUG_LINE);
#else
#define  _pth_mctx_switch_debug /*NOP*/
#endif
#if PTH_MCTX_MTH(mcsc)
#define pth_mctx_switch(old,new) \
    _pth_mctx_switch_debug \
    swapcontext(&((old)->uc), &((new)->uc));
#elif PTH_MCTX_MTH(sjlj)
#define pth_mctx_switch(old,new) \
    _pth_mctx_switch_debug \
    if (pth_mctx_save(old) == 0) \
        pth_mctx_restore(new); \
    pth_mctx_restored(old);
#else
#error "unknown mctx method"
#endif

#endif /* cpp */

/*
** ____ MACHINE STATE INITIALIZATION ________________________________
*/

#if PTH_MCTX_MTH(mcsc)

/*
 * VARIANT 1: THE STANDARDIZED SVR4/SUSv2 APPROACH
 *
 * This is the preferred variant, because it uses the standardized
 * SVR4/SUSv2 makecontext(2) and friends which is a facility intended
 * for user-space context switching. The thread creation therefore is
 * straight-foreward.
 */

intern int pth_mctx_set(
    pth_mctx_t *mctx, void (*func)(void), char *sk_addr_lo, char *sk_addr_hi)
{
    /* fetch current context */
    if (getcontext(&(mctx->uc)) != 0)
        return FALSE;

    /* remove parent link */
    mctx->uc.uc_link           = NULL;

    /* configure new stack */
    mctx->uc.uc_stack.ss_sp    = pth_skaddr(makecontext, sk_addr_lo, sk_addr_hi-sk_addr_lo);
    mctx->uc.uc_stack.ss_size  = pth_sksize(makecontext, sk_addr_lo, sk_addr_hi-sk_addr_lo);
    mctx->uc.uc_stack.ss_flags = 0;

    /* configure startup function (with no arguments) */
    makecontext(&(mctx->uc), func, 0+1);

    return TRUE;
}

#elif PTH_MCTX_MTH(sjlj)     &&\
      !PTH_MCTX_DSP(sjljlx)  &&\
      !PTH_MCTX_DSP(sjljisc) &&\
      !PTH_MCTX_DSP(sjljw32)

/*
 * VARIANT 2: THE SIGNAL STACK TRICK
 *
 * This uses sigstack/sigaltstack() and friends and is really the
 * most tricky part of Pth. When you understand the following
 * stuff you're a good Unix hacker and then you've already
 * understood the gory ingredients of Pth.  So, either welcome to
 * the club of hackers, or do yourself a favor and skip this ;)
 *
 * The ingenious fact is that this variant runs really on _all_ POSIX
 * compliant systems without special platform kludges.  But be _VERY_
 * carefully when you change something in the following code. The slightest
 * change or reordering can lead to horribly broken code.  Really every
 * function call in the following case is intended to be how it is, doubt
 * me...
 *
 * For more details we strongly recommend you to read the companion
 * paper ``Portable Multithreading -- The Signal Stack Trick for
 * User-Space Thread Creation'' from Ralf S. Engelschall. A copy of the
 * draft of this paper you can find in the file rse-pmt.ps inside the
 * GNU Pth distribution.
 */

#if !defined(SA_ONSTACK) && defined(SV_ONSTACK)
#define SA_ONSTACK SV_ONSTACK
#endif
#if !defined(SS_DISABLE) && defined(SA_DISABLE)
#define SS_DISABLE SA_DISABLE
#endif
#if PTH_MCTX_STK(sas) && !defined(HAVE_SS_SP) && defined(HAVE_SS_BASE)
#define ss_sp ss_base
#endif

static volatile jmp_buf      mctx_trampoline;

static volatile pth_mctx_t   mctx_caller;
static volatile sig_atomic_t mctx_called;

static pth_mctx_t * volatile mctx_creating;
static      void (* volatile mctx_creating_func)(void);
static volatile sigset_t     mctx_creating_sigs;

static void pth_mctx_set_trampoline(int);
static void pth_mctx_set_bootstrap(void);

/* initialize a machine state */
intern int pth_mctx_set(
    pth_mctx_t *mctx, void (*func)(void), char *sk_addr_lo, char *sk_addr_hi)
{
    struct sigaction sa;
    struct sigaction osa;
#if PTH_MCTX_STK(sas) && defined(HAVE_STACK_T)
    stack_t ss;
    stack_t oss;
#elif PTH_MCTX_STK(sas)
    struct sigaltstack ss;
    struct sigaltstack oss;
#elif PTH_MCTX_STK(ss)
    struct sigstack ss;
    struct sigstack oss;
#else
#error "unknown mctx stack setup"
#endif
    sigset_t osigs;
    sigset_t sigs;

    pth_debug1("pth_mctx_set: enter");

    /*
     * Preserve the SIGUSR1 signal state, block SIGUSR1,
     * and establish our signal handler. The signal will
     * later transfer control onto the signal stack.
     */
    sigemptyset(&sigs);
    sigaddset(&sigs, SIGUSR1);
    pth_sc(sigprocmask)(SIG_BLOCK, &sigs, &osigs);
    sa.sa_handler = pth_mctx_set_trampoline;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = SA_ONSTACK;
    if (sigaction(SIGUSR1, &sa, &osa) != 0)
        return FALSE;

    /*
     * Set the new stack.
     *
     * For sigaltstack we're lucky [from sigaltstack(2) on
     * FreeBSD 3.1]: ``Signal stacks are automatically adjusted
     * for the direction of stack growth and alignment
     * requirements''
     *
     * For sigstack we have to decide ourself [from sigstack(2)
     * on Solaris 2.6]: ``The direction of stack growth is not
     * indicated in the historical definition of struct sigstack.
     * The only way to portably establish a stack pointer is for
     * the application to determine stack growth direction.''
     */
#if PTH_MCTX_STK(sas)
    ss.ss_sp    = pth_skaddr(sigaltstack, sk_addr_lo, sk_addr_hi-sk_addr_lo);
    ss.ss_size  = pth_sksize(sigaltstack, sk_addr_lo, sk_addr_hi-sk_addr_lo);
    ss.ss_flags = 0;
    if (sigaltstack(&ss, &oss) < 0)
        return FALSE;
#elif PTH_MCTX_STK(ss)
    ss.ss_sp = pth_skaddr(sigstack, sk_addr_lo, sk_addr_hi-sk_addr_lo);
    ss.ss_onstack = 0;
    if (sigstack(&ss, &oss) < 0)
        return FALSE;
#else
#error "unknown mctx stack setup"
#endif

    /*
     * Now transfer control onto the signal stack and set it up.
     * It will return immediately via "return" after the setjmp()
     * was performed. Be careful here with race conditions.  The
     * signal can be delivered the first time sigsuspend() is
     * called.
     */
    mctx_called = FALSE;
    kill(getpid(), SIGUSR1);
    sigfillset(&sigs);
    sigdelset(&sigs, SIGUSR1);
    while (!mctx_called)
        sigsuspend(&sigs);

    /*
     * Inform the system that we are back off the signal stack by
     * removing the alternative signal stack. Be careful here: It
     * first has to be disabled, before it can be removed.
     */
#if PTH_MCTX_STK(sas)
    sigaltstack(NULL, &ss);
    ss.ss_flags = SS_DISABLE;
    if (sigaltstack(&ss, NULL) < 0)
        return FALSE;
    sigaltstack(NULL, &ss);
    if (!(ss.ss_flags & SS_DISABLE))
        return pth_error(FALSE, EIO);
    if (!(oss.ss_flags & SS_DISABLE))
        sigaltstack(&oss, NULL);
#elif PTH_MCTX_STK(ss)
    if (sigstack(&oss, NULL))
        return FALSE;
#endif

    /*
     * Restore the old SIGUSR1 signal handler and mask
     */
    sigaction(SIGUSR1, &osa, NULL);
    pth_sc(sigprocmask)(SIG_SETMASK, &osigs, NULL);

    /*
     * Initialize additional ingredients of the machine
     * context structure.
     */
#if PTH_MCTX_DSP(sjlje)
    sigemptyset(&mctx->block);
#endif
    sigemptyset(&mctx->sigs);
    mctx->error = 0;

    /*
     * Tell the trampoline and bootstrap function where to dump
     * the new machine context, and what to do afterwards...
     */
    mctx_creating      = mctx;
    mctx_creating_func = func;
    memcpy((void *)&mctx_creating_sigs, &osigs, sizeof(sigset_t));

    /*
     * Now enter the trampoline again, but this time not as a signal
     * handler. Instead we jump into it directly. The functionally
     * redundant ping-pong pointer arithmentic is neccessary to avoid
     * type-conversion warnings related to the `volatile' qualifier and
     * the fact that `jmp_buf' usually is an array type.
     */
    if (pth_mctx_save((pth_mctx_t *)&mctx_caller) == 0)
        longjmp(*((jmp_buf *)&mctx_trampoline), 1);

    /*
     * Ok, we returned again, so now we're finished
     */
    pth_debug1("pth_mctx_set: leave");
    return TRUE;
}

/* trampoline signal handler */
static void pth_mctx_set_trampoline(int sig)
{
    /*
     * Save current machine state and _immediately_ go back with
     * a standard "return" (to stop the signal handler situation)
     * to let him remove the stack again. Notice that we really
     * have do a normal "return" here, or the OS would consider
     * the thread to be running on a signal stack which isn't
     * good (for instance it wouldn't allow us to spawn a thread
     * from within a thread, etc.)
     *
     * The functionally redundant ping-pong pointer arithmentic is again
     * neccessary to avoid type-conversion warnings related to the
     * `volatile' qualifier and the fact that `jmp_buf' usually is an
     * array type.
     *
     * Additionally notice that we INTENTIONALLY DO NOT USE pth_mctx_save()
     * here. Instead we use a plain setjmp(3) call because we have to make
     * sure the alternate signal stack environment is _NOT_ saved into the
     * machine context (which can be the case for sigsetjmp(3) on some
     * platforms).
     */
    if (setjmp(*((jmp_buf *)&mctx_trampoline)) == 0) {
        pth_debug1("pth_mctx_set_trampoline: return to caller");
        mctx_called = TRUE;
        return;
    }
    pth_debug1("pth_mctx_set_trampoline: reentered from caller");

    /*
     * Ok, the caller has longjmp'ed back to us, so now prepare
     * us for the real machine state switching. We have to jump
     * into another function here to get a new stack context for
     * the auto variables (which have to be auto-variables
     * because the start of the thread happens later). Else with
     * PIC (i.e. Position Independent Code which is used when PTH
     * is built as a shared library) most platforms would
     * horrible core dump as experience showed.
     */
    pth_mctx_set_bootstrap();
}

/* boot function */
static void pth_mctx_set_bootstrap(void)
{
    pth_mctx_t * volatile mctx_starting;
    void (* volatile mctx_starting_func)(void);

    /*
     * Switch to the final signal mask (inherited from parent)
     */
    pth_sc(sigprocmask)(SIG_SETMASK, (sigset_t *)&mctx_creating_sigs, NULL);

    /*
     * Move startup details from static storage to local auto
     * variables which is necessary because it has to survive in
     * a local context until the thread is scheduled for real.
     */
    mctx_starting      = mctx_creating;
    mctx_starting_func = mctx_creating_func;

    /*
     * Save current machine state (on new stack) and
     * go back to caller until we're scheduled for real...
     */
    pth_debug1("pth_mctx_set_trampoline_jumpin: switch back to caller");
    pth_mctx_switch((pth_mctx_t *)mctx_starting, (pth_mctx_t *)&mctx_caller);

    /*
     * The new thread is now running: GREAT!
     * Now we just invoke its init function....
     */
    pth_debug1("pth_mctx_set_trampoline_jumpin: reentered from scheduler");
    mctx_starting_func();
    abort();
}

#elif PTH_MCTX_MTH(sjlj) && PTH_MCTX_DSP(sjljlx)

/*
 * VARIANT 3: LINUX SPECIFIC JMP_BUF FIDDLING
 *
 * Oh hell, I really love it when Linux guys talk about their "POSIX
 * compliant system". It's far away from POSIX compliant, IMHO. Autoconf
 * finds sigstack/sigaltstack() on Linux, yes. But it doesn't work. Why?
 * Because on Linux below version 2.2 and glibc versions below 2.1 these
 * two functions are nothing more than silly stub functions which always
 * return just -1. Very useful, yeah...
 */

#include <features.h>

intern int pth_mctx_set(
    pth_mctx_t *mctx, void (*func)(void), char *sk_addr_lo, char *sk_addr_hi)
{
    pth_mctx_save(mctx);
#if defined(__GLIBC__) && defined(__GLIBC_MINOR__) \
    && __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 0 && defined(JB_PC) && defined(JB_SP)
    mctx->jb[0].__jmpbuf[JB_PC] = (int)func;
    mctx->jb[0].__jmpbuf[JB_SP] = (int)sk_addr_hi;
#elif defined(__GLIBC__) && defined(__GLIBC_MINOR__) \
    && __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 0 && defined(__mc68000__)
    mctx->jb[0].__jmpbuf[0].__aregs[0] = (long int)func;
    mctx->jb[0].__jmpbuf[0].__sp = (int *)sk_addr_hi;
#elif defined(__GNU_LIBRARY__) && defined(__i386__)
    mctx->jb[0].__jmpbuf[0].__pc = (char *)func;
    mctx->jb[0].__jmpbuf[0].__sp = sk_addr_hi;
#else
#error "Unsupported Linux (g)libc version and/or platform"
#endif
    sigemptyset(&mctx->sigs);
    mctx->error = 0;
    return TRUE;
}

/*
 * VARIANT 4: INTERACTIVE SPECIFIC JMP_BUF FIDDLING
 *
 * No wonder, Interactive Unix (ISC) 4.x contains Microsoft code, so
 * it's clear that this beast lacks both sigstack and sigaltstack (about
 * makecontext we not even have to talk). So our only chance is to
 * fiddle with it's jmp_buf ingredients, of course. We support only i386
 * boxes.
 */

#elif PTH_MCTX_MTH(sjlj) && PTH_MCTX_DSP(sjljisc)
intern int
pth_mctx_set(pth_mctx_t *mctx, void (*func)(void),
                     char *sk_addr_lo, char *sk_addr_hi)
{
    pth_mctx_save(mctx);
#if i386
    mctx->jb[4] = (int)sk_addr_hi - sizeof(mctx->jb);
    mctx->jb[5] = (int)func;
#else
#error "Unsupported ISC architecture"
#endif
    sigemptyset(&mctx->sigs);
    mctx->error = 0;
    return TRUE;
}

/*
 * VARIANT 5: WIN32 SPECIFIC JMP_BUF FIDDLING
 *
 * Oh hell, Win32 has setjmp(3), but no sigstack(2) or sigaltstack(2).
 * So we have to fiddle around with the jmp_buf here too...
 */

#elif PTH_MCTX_MTH(sjlj) && PTH_MCTX_DSP(sjljw32)
intern int
pth_mctx_set(pth_mctx_t *mctx, void (*func)(void),
                     char *sk_addr_lo, char *sk_addr_hi)
{
    pth_mctx_save(mctx);
#if i386
    mctx->jb[7] = (int)sk_addr_hi;
    mctx->jb[8] = (int)func;
#else
#error "Unsupported Win32 architecture"
#endif
    sigemptyset(&mctx->sigs);
    mctx->error = 0;
    return TRUE;
}

/*
 * VARIANT X: JMP_BUF FIDDLING FOR ONE MORE ESOTERIC OS
 * Add the jmp_buf fiddling for your esoteric OS here...
 *
#elif PTH_MCTX_MTH(sjlj) && PTH_MCTX_DSP(sjljeso)
intern int
pth_mctx_set(pth_mctx_t *mctx, void (*func)(void),
             char *sk_addr_lo, char *sk_addr_hi)
{
    pth_mctx_save(mctx);
    sigemptyset(&mctx->sigs);
    mctx->error = 0;
    ...start hacking here...
    mctx->.... = func;
    mctx->.... = sk_addr_hi;
    mctx->.... = sk_addr_lo;
    return TRUE;
}
*/

#else
#error "unknown mctx method"
#endif

/* ==== pth_clean.c ==== */

#if cpp

typedef struct pth_cleanup_st pth_cleanup_t;
struct pth_cleanup_st {
    pth_cleanup_t *next;
    void (*func)(void *);
    void *arg;
};

#endif /* cpp */

int pth_cleanup_push(void (*func)(void *), void *arg)
{
    pth_cleanup_t *cleanup;

    if (func == NULL)
        return pth_error(FALSE, EINVAL);
    if ((cleanup = (pth_cleanup_t *)malloc(sizeof(pth_cleanup_t))) == NULL)
        return pth_error(FALSE, ENOMEM);
    cleanup->func = func;
    cleanup->arg  = arg;
    cleanup->next = pth_current->cleanups;
    pth_current->cleanups = cleanup;
    return TRUE;
}

int pth_cleanup_pop(int execute)
{
    pth_cleanup_t *cleanup;
    int rc;

    rc = FALSE;
    if ((cleanup = pth_current->cleanups) != NULL) {
        pth_current->cleanups = cleanup->next;
        if (execute)
            cleanup->func(cleanup->arg);
        free(cleanup);
        rc = TRUE;
    }
    return rc;
}

intern void pth_cleanup_popall(pth_t t, int execute)
{
    pth_cleanup_t *cleanup;

    while ((cleanup = t->cleanups) != NULL) {
        t->cleanups = cleanup->next;
        if (execute)
            cleanup->func(cleanup->arg);
        free(cleanup);
    }
    return;
}

/* ==== pth_time.c ==== */

#if cpp
#define PTH_TIME_NOW  (pth_time_t *)(0)
#define PTH_TIME_ZERO &pth_time_zero
#define PTH_TIME(sec,usec) { sec, usec }
#define pth_time_equal(t1,t2) \
        (((t1).tv_sec == (t2).tv_sec) && ((t1).tv_usec == (t2).tv_usec))
#endif /* cpp */

/* a global variable holding a zero time */
intern pth_time_t pth_time_zero = { 0L, 0L };

/* sleep for a specified amount of microseconds */
intern void pth_time_usleep(unsigned long usec)
{
#ifdef HAVE_USLEEP
    usleep((unsigned int )usec);
#else
    struct timeval timeout;
    timeout.tv_sec  = usec / 1000000;
    timeout.tv_usec = usec - (1000000 * timeout.tv_sec);
    while (pth_sc(select)(1, NULL, NULL, NULL, &timeout) < 0 && errno == EINTR) ;
#endif
    return;
}

/* calculate: t1 = t2 */
#if cpp
#if defined(HAVE_GETTIMEOFDAY_ARGS1)
#define __gettimeofday(t) gettimeofday(t)
#else
#define __gettimeofday(t) gettimeofday(t, NULL)
#endif
#define pth_time_set(t1,t2) \
    do { \
        if ((t2) == PTH_TIME_NOW) \
            __gettimeofday((t1)); \
        else { \
            (t1)->tv_sec  = (t2)->tv_sec; \
            (t1)->tv_usec = (t2)->tv_usec; \
        } \
    } while (0)
#endif /* cpp */

/* time value constructor */
pth_time_t pth_time(long sec, long usec)
{
    pth_time_t tv;

    tv.tv_sec  = sec;
    tv.tv_usec = usec;
    return tv;
}

/* timeout value constructor */
pth_time_t pth_timeout(long sec, long usec)
{
    pth_time_t tv;
    pth_time_t tvd;

    pth_time_set(&tv, PTH_TIME_NOW);
    tvd.tv_sec  = sec;
    tvd.tv_usec = usec;
    pth_time_add(&tv, &tvd);
    return tv;
}

/* calculate: t1 <=> t2 */
intern int pth_time_cmp(pth_time_t *t1, pth_time_t *t2)
{
    int rc;

    rc = t1->tv_sec - t2->tv_sec;
    if (rc == 0)
         rc = t1->tv_usec - t2->tv_usec;
    return rc;
}

/* calculate: t1 = t1 + t2 */
#if cpp
#define pth_time_add(t1,t2) \
    (t1)->tv_sec  += (t2)->tv_sec; \
    (t1)->tv_usec += (t2)->tv_usec; \
    if ((t1)->tv_usec > 1000000) { \
        (t1)->tv_sec  += 1; \
        (t1)->tv_usec -= 1000000; \
    }
#endif

/* calculate: t1 = t1 - t2 */
#if cpp
#define pth_time_sub(t1,t2) \
    (t1)->tv_sec  -= (t2)->tv_sec; \
    (t1)->tv_usec -= (t2)->tv_usec; \
    if ((t1)->tv_usec < 0) { \
        (t1)->tv_sec  -= 1; \
        (t1)->tv_usec += 1000000; \
    }
#endif

/* calculate: t1 = t1 / n */
intern void pth_time_div(pth_time_t *t1, int n)
{
    long q, r;

    q = (t1->tv_sec / n);
    r = (((t1->tv_sec % n) * 1000000) / n) + (t1->tv_usec / n);
    if (r > 1000000) {
        q += 1;
        r -= 1000000;
    }
    t1->tv_sec  = q;
    t1->tv_usec = r;
    return;
}

/* calculate: t1 = t1 * n */
intern void pth_time_mul(pth_time_t *t1, int n)
{
    t1->tv_sec  *= n;
    t1->tv_usec *= n;
    t1->tv_sec  += (t1->tv_usec / 1000000);
    t1->tv_usec  = (t1->tv_usec % 1000000);
    return;
}

/* convert a time structure into a double value */
intern double pth_time_t2d(pth_time_t *t)
{
    double d;

    d = ((double)t->tv_sec*1000000 + (double)t->tv_usec) / 1000000;
    return d;
}

/* convert a time structure into a integer value */
intern int pth_time_t2i(pth_time_t *t)
{
    int i;

    i = (t->tv_sec*1000000 + t->tv_usec) / 1000000;
    return i;
}

/* check whether time is positive */
intern int pth_time_pos(pth_time_t *t)
{
    if (t->tv_sec > 0 && t->tv_usec > 0)
        return 1;
    else
        return 0;
}

/* ==== pth_tcb.c ==== */

#if cpp

#define PTH_TCB_NAMELEN 40

    /* thread control block */
struct pth_st {
    /* priority queue handling */
    pth_t          q_next;               /* next thread in pool                         */
    pth_t          q_prev;               /* previous thread in pool                     */
    int            q_prio;               /* (relative) priority of thread when queued   */

    /* standard thread control block ingredients */
    int            prio;                 /* base priority of thread                     */
    char           name[PTH_TCB_NAMELEN];/* name of thread (mainly for debugging)       */
    int            dispatches;           /* total number of thread dispatches           */
    pth_state_t    state;                /* current state indicator for thread          */

    /* timing */
    pth_time_t     spawned;              /* time point at which thread was spawned      */
    pth_time_t     lastran;              /* time point at which thread was last running */
    pth_time_t     running;              /* time range the thread was already running   */

    /* event handling */
    pth_event_t    events;               /* events the tread is waiting for             */

    /* per-thread signal handling */
    sigset_t       sigpending;           /* set    of pending signals                   */
    int            sigpendcnt;           /* number of pending signals                   */

    /* machine context */
    pth_mctx_t     mctx;                 /* last saved machine state of thread          */
    char          *stack;                /* pointer to thread stack                     */
    unsigned int   stacksize;            /* size of thread stack                        */
    long          *stackguard;           /* stack overflow guard                        */
    int            stackloan;            /* stack type                                  */
    void        *(*start_func)(void *);  /* start routine                               */
    void          *start_arg;            /* start argument                              */

    /* thread joining */
    int            joinable;             /* whether thread is joinable                  */
    void          *join_arg;             /* joining argument                            */

    /* per-thread specific storage */
    const void   **data_value;           /* thread specific  values                     */
    int            data_count;           /* number of stored values                     */

    /* cancellation support */
    int            cancelreq;            /* cancellation request is pending             */
    unsigned int   cancelstate;          /* cancellation state of thread                */
    pth_cleanup_t *cleanups;             /* stack of thread cleanup handlers            */

    /* mutex ring */
    pth_ring_t     mutexring;            /* ring of aquired mutex structures            */

#ifdef PTH_EX
    /* per-thread exception handling */
    ex_ctx_t       ex_ctx;               /* exception handling context                  */
#endif
};

#endif /* cpp */

intern const char *pth_state_names[] = {
    "scheduler", "new", "ready", "running", "waiting", "dead"
};

#if defined(MINSIGSTKSZ) && !defined(SIGSTKSZ)
#define SIGSTKSZ MINSIGSTKSZ
#endif
#if !defined(SIGSTKSZ)
#define SIGSTKSZ 8192
#endif

/* allocate a thread control block */
intern pth_t pth_tcb_alloc(unsigned int stacksize, void *stackaddr)
{
    pth_t t;

    if (stacksize > 0 && stacksize < SIGSTKSZ)
        stacksize = SIGSTKSZ;
    if ((t = (pth_t)malloc(sizeof(struct pth_st))) == NULL)
        return NULL;
    t->stacksize  = stacksize;
    t->stack      = NULL;
    t->stackguard = NULL;
    t->stackloan  = (stackaddr != NULL ? TRUE : FALSE);
    if (stacksize > 0) { /* stacksize == 0 means "main" thread */
        if (stackaddr != NULL)
            t->stack = (char *)(stackaddr);
        else {
            if ((t->stack = (char *)malloc(stacksize)) == NULL) {
                pth_shield { free(t); }
                return NULL;
            }
        }
#if PTH_STACKGROWTH < 0
        /* guard is at lowest address (alignment is guarrantied) */
        t->stackguard = (long *)((long)t->stack); /* double cast to avoid alignment warning */
#else
        /* guard is at highest address (be careful with alignment) */
        t->stackguard = (long *)(t->stack+(((stacksize/sizeof(long))-1)*sizeof(long)));
#endif
        *t->stackguard = 0xDEAD;
    }
    return t;
}

/* free a thread control block */
intern void pth_tcb_free(pth_t t)
{
    if (t == NULL)
        return;
    if (t->stack != NULL && !t->stackloan)
        free(t->stack);
    if (t->data_value != NULL)
        free(t->data_value);
    if (t->cleanups != NULL)
        pth_cleanup_popall(t, FALSE);
    free(t);
    return;
}

/* ==== pth_util.c ==== */

/* calculate numerical mimimum */
#if cpp
#define pth_util_min(a,b) \
        ((a) > (b) ? (b) : (a))
#endif

/* delete a pending signal */
static void pth_util_sigdelete_sighandler(int _sig)
{
    /* nop */
    return;
}
intern int pth_util_sigdelete(int sig)
{
    sigset_t ss, oss;
    struct sigaction sa, osa;

    /* check status of signal */
    sigpending(&ss);
    if (!sigismember(&ss, sig))
        return FALSE;

    /* block signal and remember old mask */
    sigemptyset(&ss);
    sigaddset(&ss, sig);
    pth_sc(sigprocmask)(SIG_BLOCK, &ss, &oss);

    /* set signal action to our dummy handler */
    sa.sa_handler = pth_util_sigdelete_sighandler;
    sigfillset(&sa.sa_mask);
    sa.sa_flags = 0;
    if (sigaction(sig, &sa, &osa) != 0) {
        pth_sc(sigprocmask)(SIG_SETMASK, &oss, NULL);
        return FALSE;
    }

    /* now let signal be delivered */
    sigfillset(&ss);
    sigdelset(&ss, sig);
    sigsuspend(&ss);

    /* restore signal mask and handler */
    sigaction(sig, &osa, NULL);
    pth_sc(sigprocmask)(SIG_SETMASK, &oss, NULL);
    return TRUE;
}

/* copy a string like strncpy() but always null-terminate */
intern char *pth_util_cpystrn(char *dst, const char *src, size_t dst_size)
{
    register char *d, *end;

    if (dst_size == 0)
        return dst;
    d = dst;
    end = dst + dst_size - 1;
    for (; d < end; ++d, ++src) {
        if ((*d = *src) == '\0')
            return d;
    }
    *d = '\0';
    return d;
}

/* check whether a file-descriptor is valid */
intern int pth_util_fd_valid(int fd)
{
    if (fd < 0 || fd >= FD_SETSIZE)
        return FALSE;
    if (fcntl(fd, F_GETFL) == -1 && errno == EBADF)
        return FALSE;
    return TRUE;
}

/* merge input fd set into output fds */
intern void pth_util_fds_merge(int nfd,
                               fd_set *ifds1, fd_set *ofds1,
                               fd_set *ifds2, fd_set *ofds2,
                               fd_set *ifds3, fd_set *ofds3)
{
    register int s;

    for (s = 0; s < nfd; s++) {
        if (ifds1 != NULL)
            if (FD_ISSET(s, ifds1))
                FD_SET(s, ofds1);
        if (ifds2 != NULL)
            if (FD_ISSET(s, ifds2))
                FD_SET(s, ofds2);
        if (ifds3 != NULL)
            if (FD_ISSET(s, ifds3))
                FD_SET(s, ofds3);
    }
    return;
}

/* test whether fds in the input fd sets occurred in the output fds */
intern int pth_util_fds_test(int nfd,
                             fd_set *ifds1, fd_set *ofds1,
                             fd_set *ifds2, fd_set *ofds2,
                             fd_set *ifds3, fd_set *ofds3)
{
    register int s;

    for (s = 0; s < nfd; s++) {
        if (ifds1 != NULL)
            if (FD_ISSET(s, ifds1) && FD_ISSET(s, ofds1))
                return TRUE;
        if (ifds2 != NULL)
            if (FD_ISSET(s, ifds2) && FD_ISSET(s, ofds2))
                return TRUE;
        if (ifds3 != NULL)
            if (FD_ISSET(s, ifds3) && FD_ISSET(s, ofds3))
                return TRUE;
    }
    return FALSE;
}

/*
 * clear fds in input fd sets if not occurred in output fd sets and return
 * number of remaining input fds. This number uses BSD select(2) semantics: a
 * fd in two set counts twice!
 */
intern int pth_util_fds_select(int nfd,
                               fd_set *ifds1, fd_set *ofds1,
                               fd_set *ifds2, fd_set *ofds2,
                               fd_set *ifds3, fd_set *ofds3)
{
    register int s;
    register int n;

    n = 0;
    for (s = 0; s < nfd; s++) {
        if (ifds1 != NULL && FD_ISSET(s, ifds1)) {
            if (!FD_ISSET(s, ofds1))
               FD_CLR(s, ifds1);
            else
               n++;
        }
        if (ifds2 != NULL && FD_ISSET(s, ifds2)) {
            if (!FD_ISSET(s, ofds2))
                FD_CLR(s, ifds2);
            else
                n++;
        }
        if (ifds3 != NULL && FD_ISSET(s, ifds3)) {
            if (!FD_ISSET(s, ofds3))
                FD_CLR(s, ifds3);
            else
                n++;
        }
    }
    return n;
}

/* ==== pth_pqueue.c ==== */

#if cpp

/* thread priority queue */
struct pth_pqueue_st {
    pth_t q_head;
    int   q_num;
};
typedef struct pth_pqueue_st pth_pqueue_t;

#endif /* cpp */

/* initialize a priority queue; O(1) */
intern void pth_pqueue_init(pth_pqueue_t *q)
{
    if (q != NULL) {
        q->q_head = NULL;
        q->q_num  = 0;
    }
    return;
}

/* insert thread into priority queue; O(n) */
intern void pth_pqueue_insert(pth_pqueue_t *q, int prio, pth_t t)
{
    pth_t c;
    int p;

    if (q == NULL)
        return;
    if (q->q_head == NULL || q->q_num == 0) {
        /* add as first element */
        t->q_prev = t;
        t->q_next = t;
        t->q_prio = prio;
        q->q_head = t;
    }
    else if (q->q_head->q_prio < prio) {
        /* add as new head of queue */
        t->q_prev = q->q_head->q_prev;
        t->q_next = q->q_head;
        t->q_prev->q_next = t;
        t->q_next->q_prev = t;
        t->q_prio = prio;
        t->q_next->q_prio = prio - t->q_next->q_prio;
        q->q_head = t;
    }
    else {
        /* insert after elements with greater or equal priority */
        c = q->q_head;
        p = c->q_prio;
        while ((p - c->q_next->q_prio) >= prio && c->q_next != q->q_head) {
            c = c->q_next;
            p -= c->q_prio;
        }
        t->q_prev = c;
        t->q_next = c->q_next;
        t->q_prev->q_next = t;
        t->q_next->q_prev = t;
        t->q_prio = p - prio;
        if (t->q_next != q->q_head)
            t->q_next->q_prio -= t->q_prio;
    }
    q->q_num++;
    return;
}

/* remove thread with maximum priority from priority queue; O(1) */
intern pth_t pth_pqueue_delmax(pth_pqueue_t *q)
{
    pth_t t;

    if (q == NULL)
        return NULL;
    if (q->q_head == NULL)
        t = NULL;
    else if (q->q_head->q_next == q->q_head) {
        /* remove the last element and make queue empty */
        t = q->q_head;
        t->q_next = NULL;
        t->q_prev = NULL;
        t->q_prio = 0;
        q->q_head = NULL;
        q->q_num  = 0;
    }
    else {
        /* remove head of queue */
        t = q->q_head;
        t->q_prev->q_next = t->q_next;
        t->q_next->q_prev = t->q_prev;
        t->q_next->q_prio = t->q_prio - t->q_next->q_prio;
        t->q_prio = 0;
        q->q_head = t->q_next;
        q->q_num--;
    }
    return t;
}

/* remove thread from priority queue; O(n) */
intern void pth_pqueue_delete(pth_pqueue_t *q, pth_t t)
{
    if (q == NULL)
        return;
    if (q->q_head == NULL)
        return;
    else if (q->q_head == t) {
        if (t->q_next == t) {
            /* remove the last element and make queue empty */
            t->q_next = NULL;
            t->q_prev = NULL;
            t->q_prio = 0;
            q->q_head = NULL;
            q->q_num  = 0;
        }
        else {
            /* remove head of queue */
            t->q_prev->q_next = t->q_next;
            t->q_next->q_prev = t->q_prev;
            t->q_next->q_prio = t->q_prio - t->q_next->q_prio;
            t->q_prio = 0;
            q->q_head = t->q_next;
            q->q_num--;
        }
    }
    else {
        t->q_prev->q_next = t->q_next;
        t->q_next->q_prev = t->q_prev;
        if (t->q_next != q->q_head)
            t->q_next->q_prio += t->q_prio;
        t->q_prio = 0;
        q->q_num--;
    }
    return;
}

/* determine priority required to favorite a thread; O(1) */
#if cpp
#define pth_pqueue_favorite_prio(q) \
    ((q)->q_head != NULL ? (q)->q_head->q_prio + 1 : PTH_PRIO_MAX)
#endif

/* move a thread inside queue to the top; O(n) */
intern int pth_pqueue_favorite(pth_pqueue_t *q, pth_t t)
{
    if (q == NULL)
        return FALSE;
    if (q->q_head == NULL || q->q_num == 0)
        return FALSE;
    /* element is already at top */
    if (q->q_num == 1)
        return TRUE;
    /* move to top */
    pth_pqueue_delete(q, t);
    pth_pqueue_insert(q, pth_pqueue_favorite_prio(q), t);
    return TRUE;
}

/* increase priority of all(!) threads in queue; O(1) */
intern void pth_pqueue_increase(pth_pqueue_t *q)
{
    if (q == NULL)
        return;
    if (q->q_head == NULL)
        return;
    /* <grin> yes, that's all ;-) */
    q->q_head->q_prio += 1;
    return;
}

/* return number of elements in priority queue: O(1) */
#if cpp
#define pth_pqueue_elements(q) \
    ((q) == NULL ? (-1) : (q)->q_num)
#endif

/* walk to first thread in queue; O(1) */
#if cpp
#define pth_pqueue_head(q) \
    ((q) == NULL ? NULL : (q)->q_head)
#endif

/* walk to last thread in queue */
intern pth_t pth_pqueue_tail(pth_pqueue_t *q)
{
    if (q == NULL)
        return NULL;
    if (q->q_head == NULL)
        return NULL;
    return q->q_head->q_prev;
}

/* walk to next or previous thread in queue; O(1) */
intern pth_t pth_pqueue_walk(pth_pqueue_t *q, pth_t t, int direction)
{
    pth_t tn;

    if (q == NULL || t == NULL)
        return NULL;
    tn = NULL;
    if (direction == PTH_WALK_PREV) {
        if (t != q->q_head)
            tn = t->q_prev;
    }
    else if (direction == PTH_WALK_NEXT) {
        tn = t->q_next;
        if (tn == q->q_head)
            tn = NULL;
    }
    return tn;
}

/* check whether a thread is in a queue; O(n) */
intern int pth_pqueue_contains(pth_pqueue_t *q, pth_t t)
{
    pth_t tc;
    int found;

    found = FALSE;
    for (tc = pth_pqueue_head(q); tc != NULL;
         tc = pth_pqueue_walk(q, tc, PTH_WALK_NEXT)) {
        if (tc == t) {
            found = TRUE;
            break;
        }
    }
    return found;
}

/* ==== pth_event.c ==== */

#if cpp

/* event structure */
struct pth_event_st {
    struct pth_event_st *ev_next;
    struct pth_event_st *ev_prev;
    pth_status_t ev_status;
    int ev_type;
    int ev_goal;
    union {
        struct { int fd; }                                        FD;
        struct { int *n; int nfd; fd_set *rfds, *wfds, *efds; }   SELECT;
        struct { sigset_t *sigs; int *sig; }                      SIGS;
        struct { pth_time_t tv; }                                 TIME;
        struct { pth_msgport_t mp; }                              MSG;
        struct { pth_mutex_t *mutex; }                            MUTEX;
        struct { pth_cond_t *cond; }                              COND;
        struct { pth_t tid; }                                     TID;
        struct { int (*func)(void *); void *arg; pth_time_t tv; } FUNC;
    } ev_args;
};

#endif /* cpp */

/* event structure destructor */
static void pth_event_destructor(void *vp)
{
    /* free this single(!) event. That it is just a single event is a
       requirement for pth_event(PTH_MODE_STATIC, ...), or else we would
       get into horrible trouble on asychronous cleanups */
    pth_event_free((pth_event_t)vp, PTH_FREE_THIS);
    return;
}

/* event structure constructor */
pth_event_t pth_event(unsigned long spec, ...)
{
    pth_event_t ev;
    pth_key_t *ev_key;
    va_list ap;

    va_start(ap, spec);

    /* allocate new or reuse static or supplied event structure */
    if (spec & PTH_MODE_REUSE) {
        /* reuse supplied event structure */
        ev = va_arg(ap, pth_event_t);
    }
    else if (spec & PTH_MODE_STATIC) {
        /* reuse static event structure */
        ev_key = va_arg(ap, pth_key_t *);
        if (*ev_key == PTH_KEY_INIT)
            pth_key_create(ev_key, pth_event_destructor);
        ev = (pth_event_t)pth_key_getdata(*ev_key);
        if (ev == NULL) {
            ev = (pth_event_t)malloc(sizeof(struct pth_event_st));
            pth_key_setdata(*ev_key, ev);
        }
    }
    else {
        /* allocate new dynamic event structure */
        ev = (pth_event_t)malloc(sizeof(struct pth_event_st));
    }
    if (ev == NULL)
        return pth_error((pth_event_t)NULL, errno);

    /* create new event ring out of event or insert into existing ring */
    if (spec & PTH_MODE_CHAIN) {
        pth_event_t ch = va_arg(ap, pth_event_t);
        ev->ev_prev = ch->ev_prev;
        ev->ev_next = ch;
        ev->ev_prev->ev_next = ev;
        ev->ev_next->ev_prev = ev;
    }
    else {
        ev->ev_prev = ev;
        ev->ev_next = ev;
    }

    /* initialize common ingredients */
    ev->ev_status = PTH_STATUS_PENDING;

    /* initialize event specific ingredients */
    if (spec & PTH_EVENT_FD) {
        /* filedescriptor event */
        int fd = va_arg(ap, int);
        if (!pth_util_fd_valid(fd))
            return pth_error((pth_event_t)NULL, EBADF);
        ev->ev_type = PTH_EVENT_FD;
        ev->ev_goal = (int)(spec & (PTH_UNTIL_FD_READABLE|\
                                    PTH_UNTIL_FD_WRITEABLE|\
                                    PTH_UNTIL_FD_EXCEPTION));
        ev->ev_args.FD.fd = fd;
    }
    else if (spec & PTH_EVENT_SELECT) {
        /* filedescriptor set select event */
        int *n = va_arg(ap, int *);
        int nfd = va_arg(ap, int);
        fd_set *rfds = va_arg(ap, fd_set *);
        fd_set *wfds = va_arg(ap, fd_set *);
        fd_set *efds = va_arg(ap, fd_set *);
        ev->ev_type = PTH_EVENT_SELECT;
        ev->ev_goal = (int)(spec & (PTH_UNTIL_OCCURRED));
        ev->ev_args.SELECT.n    = n;
        ev->ev_args.SELECT.nfd  = nfd;
        ev->ev_args.SELECT.rfds = rfds;
        ev->ev_args.SELECT.wfds = wfds;
        ev->ev_args.SELECT.efds = efds;
    }
    else if (spec & PTH_EVENT_SIGS) {
        /* signal set event */
        sigset_t *sigs = va_arg(ap, sigset_t *);
        int *sig = va_arg(ap, int *);
        ev->ev_type = PTH_EVENT_SIGS;
        ev->ev_goal = (int)(spec & (PTH_UNTIL_OCCURRED));
        ev->ev_args.SIGS.sigs = sigs;
        ev->ev_args.SIGS.sig = sig;
    }
    else if (spec & PTH_EVENT_TIME) {
        /* interrupt request event */
        pth_time_t tv = va_arg(ap, pth_time_t);
        ev->ev_type = PTH_EVENT_TIME;
        ev->ev_goal = (int)(spec & (PTH_UNTIL_OCCURRED));
        ev->ev_args.TIME.tv = tv;
    }
    else if (spec & PTH_EVENT_MSG) {
        /* message port event */
        pth_msgport_t mp = va_arg(ap, pth_msgport_t);
        ev->ev_type = PTH_EVENT_MSG;
        ev->ev_goal = (int)(spec & (PTH_UNTIL_OCCURRED));
        ev->ev_args.MSG.mp = mp;
    }
    else if (spec & PTH_EVENT_MUTEX) {
        /* mutual exclusion lock */
        pth_mutex_t *mutex = va_arg(ap, pth_mutex_t *);
        ev->ev_type = PTH_EVENT_MUTEX;
        ev->ev_goal = (int)(spec & (PTH_UNTIL_OCCURRED));
        ev->ev_args.MUTEX.mutex = mutex;
    }
    else if (spec & PTH_EVENT_COND) {
        /* condition variable */
        pth_cond_t *cond = va_arg(ap, pth_cond_t *);
        ev->ev_type = PTH_EVENT_COND;
        ev->ev_goal = (int)(spec & (PTH_UNTIL_OCCURRED));
        ev->ev_args.COND.cond = cond;
    }
    else if (spec & PTH_EVENT_TID) {
        /* thread id event */
        pth_t tid = va_arg(ap, pth_t);
        int goal;
        ev->ev_type = PTH_EVENT_TID;
        if (spec & PTH_UNTIL_TID_NEW)
            goal = PTH_STATE_NEW;
        else if (spec & PTH_UNTIL_TID_READY)
            goal = PTH_STATE_READY;
        else if (spec & PTH_UNTIL_TID_WAITING)
            goal = PTH_STATE_WAITING;
        else if (spec & PTH_UNTIL_TID_DEAD)
            goal = PTH_STATE_DEAD;
        else
            goal = PTH_STATE_READY;
        ev->ev_goal = goal;
        ev->ev_args.TID.tid = tid;
    }
    else if (spec & PTH_EVENT_FUNC) {
        /* custom function event */
        ev->ev_type = PTH_EVENT_FUNC;
        ev->ev_goal = (int)(spec & (PTH_UNTIL_OCCURRED));
        ev->ev_args.FUNC.func  = (int (*)(void *))va_arg(ap, void *);
        ev->ev_args.FUNC.arg   = va_arg(ap, void *);
        ev->ev_args.FUNC.tv    = va_arg(ap, pth_time_t);
    }
    else
        return pth_error((pth_event_t)NULL, EINVAL);

    va_end(ap);

    /* return event */
    return ev;
}

/* determine type of event */
unsigned long pth_event_typeof(pth_event_t ev)
{
    if (ev == NULL)
        return pth_error(0, EINVAL);
    return (ev->ev_type | ev->ev_goal);
}

/* event extractor */
int pth_event_extract(pth_event_t ev, ...)
{
    va_list ap;

    if (ev == NULL)
        return pth_error(FALSE, EINVAL);
    va_start(ap, ev);

    /* extract event specific ingredients */
    if (ev->ev_type & PTH_EVENT_FD) {
        /* filedescriptor event */
        int *fd = va_arg(ap, int *);
        *fd = ev->ev_args.FD.fd;
    }
    else if (ev->ev_type & PTH_EVENT_SIGS) {
        /* signal set event */
        sigset_t **sigs = va_arg(ap, sigset_t **);
        int **sig = va_arg(ap, int **);
        *sigs = ev->ev_args.SIGS.sigs;
        *sig = ev->ev_args.SIGS.sig;
    }
    else if (ev->ev_type & PTH_EVENT_TIME) {
        /* interrupt request event */
        pth_time_t *tv = va_arg(ap, pth_time_t *);
        *tv = ev->ev_args.TIME.tv;
    }
    else if (ev->ev_type & PTH_EVENT_MSG) {
        /* message port event */
        pth_msgport_t *mp = va_arg(ap, pth_msgport_t *);
        *mp = ev->ev_args.MSG.mp;
    }
    else if (ev->ev_type & PTH_EVENT_MUTEX) {
        /* mutual exclusion lock */
        pth_mutex_t **mutex = va_arg(ap, pth_mutex_t **);
        *mutex = ev->ev_args.MUTEX.mutex;
    }
    else if (ev->ev_type & PTH_EVENT_COND) {
        /* condition variable */
        pth_cond_t **cond = va_arg(ap, pth_cond_t **);
        *cond = ev->ev_args.COND.cond;
    }
    else if (ev->ev_type & PTH_EVENT_TID) {
        /* thread id event */
        pth_t *tid = va_arg(ap, pth_t *);
        *tid = ev->ev_args.TID.tid;
    }
    else if (ev->ev_type & PTH_EVENT_FUNC) {
        /* custom function event */
        void **func    = va_arg(ap, void **);
        void **arg     = va_arg(ap, void **);
        pth_time_t *tv = va_arg(ap, pth_time_t *);
        *func = (int (**)(void *))ev->ev_args.FUNC.func;
        *arg  = ev->ev_args.FUNC.arg;
        *tv   = ev->ev_args.FUNC.tv;
    }
    else
        return pth_error(FALSE, EINVAL);
    va_end(ap);
    return TRUE;
}

/* concatenate one or more events or event rings */
pth_event_t pth_event_concat(pth_event_t evf, ...)
{
    pth_event_t evc; /* current event */
    pth_event_t evn; /* next event */
    pth_event_t evl; /* last event */
    pth_event_t evt; /* temporary event */
    va_list ap;

    if (evf == NULL)
        return pth_error((pth_event_t)NULL, EINVAL);

    /* open ring */
    va_start(ap, evf);
    evc = evf;
    evl = evc->ev_next;

    /* attach additional rings */
    while ((evn = va_arg(ap, pth_event_t)) != NULL) {
        evc->ev_next = evn;
        evt = evn->ev_prev;
        evn->ev_prev = evc;
        evc = evt;
    }

    /* close ring */
    evc->ev_next = evl;
    evl->ev_prev = evc;
    va_end(ap);

    return evf;
}

/* isolate one event from a possible appended event ring */
pth_event_t pth_event_isolate(pth_event_t ev)
{
    pth_event_t ring;

    if (ev == NULL)
        return pth_error((pth_event_t)NULL, EINVAL);
    ring = NULL;
    if (!(ev->ev_next == ev && ev->ev_prev == ev)) {
        ring = ev->ev_next;
        ev->ev_prev->ev_next = ev->ev_next;
        ev->ev_next->ev_prev = ev->ev_prev;
        ev->ev_prev = ev;
        ev->ev_next = ev;
    }
    return ring;
}

/* determine status of the event */
pth_status_t pth_event_status(pth_event_t ev)
{
    if (ev == NULL)
        return pth_error(FALSE, EINVAL);
    return ev->ev_status;
}

/* walk to next or previous event in an event ring */
pth_event_t pth_event_walk(pth_event_t ev, unsigned int direction)
{
    if (ev == NULL)
        return pth_error((pth_event_t)NULL, EINVAL);
    do {
        if (direction & PTH_WALK_NEXT)
            ev = ev->ev_next;
        else if (direction & PTH_WALK_PREV)
            ev = ev->ev_prev;
        else
            return pth_error((pth_event_t)NULL, EINVAL);
    } while ((direction & PTH_UNTIL_OCCURRED) && (ev->ev_status != PTH_STATUS_OCCURRED));
    return ev;
}

/* deallocate an event structure */
int pth_event_free(pth_event_t ev, int mode)
{
    pth_event_t evc;
    pth_event_t evn;

    if (ev == NULL)
        return pth_error(FALSE, EINVAL);
    if (mode == PTH_FREE_THIS) {
        ev->ev_prev->ev_next = ev->ev_next;
        ev->ev_next->ev_prev = ev->ev_prev;
        free(ev);
    }
    else if (mode == PTH_FREE_ALL) {
        evc = ev;
        do {
            evn = evc->ev_next;
            free(evc);
            evc = evn;
        } while (evc != ev);
    }
    return TRUE;
}

/* wait for one or more events */
int pth_wait(pth_event_t ev_ring)
{
    int nonpending;
    pth_event_t ev;

    /* at least a waiting ring is required */
    if (ev_ring == NULL)
        return pth_error(-1, EINVAL);
    pth_debug2("pth_wait: enter from thread \"%s\"", pth_current->name);

    /* mark all events in waiting ring as still pending */
    ev = ev_ring;
    do {
        ev->ev_status = PTH_STATUS_PENDING;
        pth_debug2("pth_wait: waiting on event 0x%lx", (unsigned long)ev);
        ev = ev->ev_next;
    } while (ev != ev_ring);

    /* link event ring to current thread */
    pth_current->events = ev_ring;

    /* move thread into waiting state
       and transfer control to scheduler */
    pth_current->state = PTH_STATE_WAITING;
    pth_yield(NULL);

    /* check for cancellation */
    pth_cancel_point();

    /* unlink event ring from current thread */
    pth_current->events = NULL;

    /* count number of actually occurred (or failed) events */
    ev = ev_ring;
    nonpending = 0;
    do {
        if (ev->ev_status != PTH_STATUS_PENDING) {
            pth_debug2("pth_wait: non-pending event 0x%lx", (unsigned long)ev);
            nonpending++;
        }
        ev = ev->ev_next;
    } while (ev != ev_ring);

    /* leave to current thread with number of occurred events */
    pth_debug2("pth_wait: leave to thread \"%s\"", pth_current->name);
    return nonpending;
}

/* ==== pth_sched.c ==== */

intern pth_t        pth_main;       /* the main thread                       */
intern pth_t        pth_sched;      /* the permanent scheduler thread        */
intern pth_t        pth_current;    /* the currently running thread          */
intern pth_pqueue_t pth_NQ;         /* queue of new threads                  */
intern pth_pqueue_t pth_RQ;         /* queue of threads ready to run         */
intern pth_pqueue_t pth_WQ;         /* queue of threads waiting for an event */
intern pth_pqueue_t pth_SQ;         /* queue of suspended threads            */
intern pth_pqueue_t pth_DQ;         /* queue of terminated threads           */
intern float        pth_loadval;    /* average scheduler load value          */

static int          pth_sigpipe[2]; /* internal signal occurrence pipe       */
static sigset_t     pth_sigpending; /* mask of pending signals               */
static sigset_t     pth_sigblock;   /* mask of signals we block in scheduler */
static sigset_t     pth_sigcatch;   /* mask of signals we have to catch      */
static sigset_t     pth_sigraised;  /* mask of raised signals                */

static pth_time_t   pth_loadticknext;
static pth_time_t   pth_loadtickgap = PTH_TIME(1,0);

/* initialize the scheduler ingredients */
intern int pth_scheduler_init(void)
{
    /* create the internal signal pipe */
    if (pipe(pth_sigpipe) == -1)
        return pth_error(FALSE, errno);
    if (pth_fdmode(pth_sigpipe[0], PTH_FDMODE_NONBLOCK) == PTH_FDMODE_ERROR)
        return pth_error(FALSE, errno);
    if (pth_fdmode(pth_sigpipe[1], PTH_FDMODE_NONBLOCK) == PTH_FDMODE_ERROR)
        return pth_error(FALSE, errno);

    /* initialize the essential threads */
    pth_sched   = NULL;
    pth_current = NULL;

    /* initalize the thread queues */
    pth_pqueue_init(&pth_NQ);
    pth_pqueue_init(&pth_RQ);
    pth_pqueue_init(&pth_WQ);
    pth_pqueue_init(&pth_SQ);
    pth_pqueue_init(&pth_DQ);

    /* initialize load support */
    pth_loadval = 1.0;
    pth_time_set(&pth_loadticknext, PTH_TIME_NOW);

    return TRUE;
}

/* drop all threads (except for the currently active one) */
intern void pth_scheduler_drop(void)
{
    pth_t t;

    /* clear the new queue */
    while ((t = pth_pqueue_delmax(&pth_NQ)) != NULL)
        pth_tcb_free(t);
    pth_pqueue_init(&pth_NQ);

    /* clear the ready queue */
    while ((t = pth_pqueue_delmax(&pth_RQ)) != NULL)
        pth_tcb_free(t);
    pth_pqueue_init(&pth_RQ);

    /* clear the waiting queue */
    while ((t = pth_pqueue_delmax(&pth_WQ)) != NULL)
        pth_tcb_free(t);
    pth_pqueue_init(&pth_WQ);

    /* clear the suspend queue */
    while ((t = pth_pqueue_delmax(&pth_SQ)) != NULL)
        pth_tcb_free(t);
    pth_pqueue_init(&pth_SQ);

    /* clear the dead queue */
    while ((t = pth_pqueue_delmax(&pth_DQ)) != NULL)
        pth_tcb_free(t);
    pth_pqueue_init(&pth_DQ);
    return;
}

/* kill the scheduler ingredients */
intern void pth_scheduler_kill(void)
{
    /* drop all threads */
    pth_scheduler_drop();

    /* remove the internal signal pipe */
    close(pth_sigpipe[0]);
    close(pth_sigpipe[1]);
    return;
}

/*
 * Update the average scheduler load.
 *
 * This is called on every context switch, but we have to adjust the
 * average load value every second, only. If we're called more than
 * once per second we handle this by just calculating anything once
 * and then do NOPs until the next ticks is over. If the scheduler
 * waited for more than once second (or a thread CPU burst lasted for
 * more than once second) we simulate the missing calculations. That's
 * no problem because we can assume that the number of ready threads
 * then wasn't changed dramatically (or more context switched would have
 * been occurred and we would have been given more chances to operate).
 * The actual average load is calculated through an exponential average
 * formula.
 */
#define pth_scheduler_load(now) \
    if (pth_time_cmp((now), &pth_loadticknext) >= 0) { \
        pth_time_t ttmp; \
        int numready; \
        numready = pth_pqueue_elements(&pth_RQ); \
        pth_time_set(&ttmp, (now)); \
        do { \
            pth_loadval = (numready*0.25) + (pth_loadval*0.75); \
            pth_time_sub(&ttmp, &pth_loadtickgap); \
        } while (pth_time_cmp(&ttmp, &pth_loadticknext) >= 0); \
        pth_time_set(&pth_loadticknext, (now)); \
        pth_time_add(&pth_loadticknext, &pth_loadtickgap); \
    }

/* the heart of this library: the thread scheduler */
intern void *pth_scheduler(void *dummy)
{
    sigset_t sigs;
    pth_time_t running;
    pth_time_t snapshot;
    struct sigaction sa;
    sigset_t ss;
    int sig;
    pth_t t;

    /*
     * bootstrapping
     */
    pth_debug1("pth_scheduler: bootstrapping");

    /* mark this thread as the special scheduler thread */
    pth_sched->state = PTH_STATE_SCHEDULER;

    /* block all signals in the scheduler thread */
    sigfillset(&sigs);
    pth_sc(sigprocmask)(SIG_SETMASK, &sigs, NULL);

    /* initialize the snapshot time for bootstrapping the loop */
    pth_time_set(&snapshot, PTH_TIME_NOW);

    /*
     * endless scheduler loop
     */
    for (;;) {
        /*
         * Move threads from new queue to ready queue and give
         * them maximum priority so they start immediately
         */
        while ((t = pth_pqueue_tail(&pth_NQ)) != NULL) {
            pth_pqueue_delete(&pth_NQ, t);
            t->state = PTH_STATE_READY;
            pth_pqueue_insert(&pth_RQ, pth_pqueue_favorite_prio(&pth_RQ), t);
            pth_debug2("pth_scheduler: new thread \"%s\" moved to top of ready queue", t->name);
        }

        /*
         * Update average scheduler load
         */
        pth_scheduler_load(&snapshot);

        /*
         * Find next thread in ready queue
         */
        pth_current = pth_pqueue_delmax(&pth_RQ);
        if (pth_current == NULL) {
            fprintf(stderr, "**Pth** SCHEDULER INTERNAL ERROR: "
                            "no more thread(s) available to schedule!?!?\n");
            abort();
        }
        pth_debug4("pth_scheduler: thread \"%s\" selected (prio=%d, qprio=%d)",
                   pth_current->name, pth_current->prio, pth_current->q_prio);

        /*
         * Raise additionally thread-specific signals
         * (they are delivered when we switch the context)
         *
         * Situation is ('#' = signal pending):
         *     process pending (pth_sigpending):         ----####
         *     thread pending (pth_current->sigpending): --##--##
         * Result has to be:
         *     process new pending:                      --######
         */
        if (pth_current->sigpendcnt > 0) {
            sigpending(&pth_sigpending);
            for (sig = 1; sig < PTH_NSIG; sig++)
                if (sigismember(&pth_current->sigpending, sig))
                    if (!sigismember(&pth_sigpending, sig))
                        kill(getpid(), sig);
        }

        /*
         * Set running start time for new thread
         * and perform a context switch to it
         */
        pth_debug3("pth_scheduler: switching to thread 0x%lx (\"%s\")",
                   (unsigned long)pth_current, pth_current->name);

        /* update thread times */
        pth_time_set(&pth_current->lastran, PTH_TIME_NOW);

        /* update scheduler times */
        pth_time_set(&running, &pth_current->lastran);
        pth_time_sub(&running, &snapshot);
        pth_time_add(&pth_sched->running, &running);

        /* ** ENTERING THREAD ** - by switching the machine context */
        pth_current->dispatches++;
        pth_mctx_switch(&pth_sched->mctx, &pth_current->mctx);

        /* update scheduler times */
        pth_time_set(&snapshot, PTH_TIME_NOW);
        pth_debug3("pth_scheduler: cameback from thread 0x%lx (\"%s\")",
                   (unsigned long)pth_current, pth_current->name);

        /*
         * Calculate and update the time the previous thread was running
         */
        pth_time_set(&running, &snapshot);
        pth_time_sub(&running, &pth_current->lastran);
        pth_time_add(&pth_current->running, &running);
        pth_debug3("pth_scheduler: thread \"%s\" ran %.6f",
                   pth_current->name, pth_time_t2d(&running));

        /*
         * Remove still pending thread-specific signals
         * (they are re-delivered next time)
         *
         * Situation is ('#' = signal pending):
         *     thread old pending (pth_current->sigpending): --##--##
         *     process old pending (pth_sigpending):         ----####
         *     process still pending (sigstillpending):      ---#-#-#
         * Result has to be:
         *     process new pending:                          -----#-#
         *     thread new pending (pth_current->sigpending): ---#---#
         */
        if (pth_current->sigpendcnt > 0) {
            sigset_t sigstillpending;
            sigpending(&sigstillpending);
            for (sig = 1; sig < PTH_NSIG; sig++) {
                if (sigismember(&pth_current->sigpending, sig)) {
                    if (!sigismember(&sigstillpending, sig)) {
                        /* thread (and perhaps also process) signal delivered */
                        sigdelset(&pth_current->sigpending, sig);
                        pth_current->sigpendcnt--;
                    }
                    else if (!sigismember(&pth_sigpending, sig)) {
                        /* thread signal not delivered */
                        pth_util_sigdelete(sig);
                    }
                }
            }
        }

        /*
         * Check for stack overflow
         */
        if (pth_current->stackguard != NULL) {
            if (*pth_current->stackguard != 0xDEAD) {
                pth_debug3("pth_scheduler: stack overflow detected for thread 0x%lx (\"%s\")",
                           (unsigned long)pth_current, pth_current->name);
                /*
                 * if the application doesn't catch SIGSEGVs, we terminate
                 * manually with a SIGSEGV now, but output a reasonable message.
                 */
                if (sigaction(SIGSEGV, NULL, &sa) == 0) {
                    if (sa.sa_handler == SIG_DFL) {
                        fprintf(stderr, "**Pth** STACK OVERFLOW: thread pid_t=0x%lx, name=\"%s\"\n",
                                (unsigned long)pth_current, pth_current->name);
                        kill(getpid(), SIGSEGV);
                        sigfillset(&ss);
                        sigdelset(&ss, SIGSEGV);
                        sigsuspend(&ss);
                        abort();
                    }
                }
                /*
                 * else we terminate the thread only and send us a SIGSEGV
                 * which allows the application to handle the situation...
                 */
                pth_current->join_arg = (void *)0xDEAD;
                pth_current->state = PTH_STATE_DEAD;
                kill(getpid(), SIGSEGV);
            }
        }

        /*
         * If previous thread is now marked as dead, kick it out
         */
        if (pth_current->state == PTH_STATE_DEAD) {
            pth_debug2("pth_scheduler: marking thread \"%s\" as dead", pth_current->name);
            if (!pth_current->joinable)
                pth_tcb_free(pth_current);
            else
                pth_pqueue_insert(&pth_DQ, PTH_PRIO_STD, pth_current);
            pth_current = NULL;
        }

        /*
         * If thread wants to wait for an event
         * move it to waiting queue now
         */
        if (pth_current != NULL && pth_current->state == PTH_STATE_WAITING) {
            pth_debug2("pth_scheduler: moving thread \"%s\" to waiting queue",
                       pth_current->name);
            pth_pqueue_insert(&pth_WQ, pth_current->prio, pth_current);
            pth_current = NULL;
        }

        /*
         * migrate old treads in ready queue into higher
         * priorities to avoid starvation and insert last running
         * thread back into this queue, too.
         */
        pth_pqueue_increase(&pth_RQ);
        if (pth_current != NULL)
            pth_pqueue_insert(&pth_RQ, pth_current->prio, pth_current);

        /*
         * Manage the events in the waiting queue, i.e. decide whether their
         * events occurred and move them to the ready queue. But wait only if
         * we have already no new or ready threads.
         */
        if (   pth_pqueue_elements(&pth_RQ) == 0
            && pth_pqueue_elements(&pth_NQ) == 0)
            pth_sched_eventmanager(&snapshot, FALSE /* wait */);
        else
            pth_sched_eventmanager(&snapshot, TRUE  /* poll */);
    }

    /* NOTREACHED */
    return NULL;
}

/*
 * Look whether some events already occurred (or failed) and move
 * corresponding threads from waiting queue back to ready queue.
 */
intern void pth_sched_eventmanager(pth_time_t *now, int dopoll)
{
    pth_t nexttimer_thread;
    pth_event_t nexttimer_ev;
    pth_time_t nexttimer_value;
    pth_event_t evh;
    pth_event_t ev;
    pth_t t;
    pth_t tlast;
    int this_occurred;
    int any_occurred;
    fd_set rfds;
    fd_set wfds;
    fd_set efds;
    struct timeval delay;
    struct timeval *pdelay;
    sigset_t oss;
    struct sigaction sa;
    struct sigaction osa[1+PTH_NSIG];
    char minibuf[128];
    int loop_repeat;
    int fdmax;
    int rc;
    int sig;
    int n;

    pth_debug2("pth_sched_eventmanager: enter in %s mode",
               dopoll ? "polling" : "waiting");

    /* entry point for internal looping in event handling */
    loop_entry:
    loop_repeat = FALSE;

    /* initialize fd sets */
    FD_ZERO(&rfds);
    FD_ZERO(&wfds);
    FD_ZERO(&efds);
    fdmax = -1;

    /* initialize signal status */
    sigpending(&pth_sigpending);
    sigfillset(&pth_sigblock);
    sigemptyset(&pth_sigcatch);
    sigemptyset(&pth_sigraised);

    /* initialize next timer */
    pth_time_set(&nexttimer_value, PTH_TIME_ZERO);
    nexttimer_thread = NULL;
    nexttimer_ev = NULL;

    /* for all threads in the waiting queue... */
    any_occurred = FALSE;
    for (t = pth_pqueue_head(&pth_WQ); t != NULL;
         t = pth_pqueue_walk(&pth_WQ, t, PTH_WALK_NEXT)) {

        /* determine signals we block */
        for (sig = 1; sig < PTH_NSIG; sig++)
            if (!sigismember(&(t->mctx.sigs), sig))
                sigdelset(&pth_sigblock, sig);

        /* cancellation support */
        if (t->cancelreq == TRUE)
            any_occurred = TRUE;

        /* ... and all their events... */
        if (t->events == NULL)
            continue;
        /* ...check whether events occurred */
        ev = evh = t->events;
        do {
            if (ev->ev_status == PTH_STATUS_PENDING) {
                this_occurred = FALSE;

                /* Filedescriptor I/O */
                if (ev->ev_type == PTH_EVENT_FD) {
                    /* filedescriptors are checked later all at once.
                       Here we only assemble them in the fd sets */
                    if (ev->ev_goal & PTH_UNTIL_FD_READABLE)
                        FD_SET(ev->ev_args.FD.fd, &rfds);
                    if (ev->ev_goal & PTH_UNTIL_FD_WRITEABLE)
                        FD_SET(ev->ev_args.FD.fd, &wfds);
                    if (ev->ev_goal & PTH_UNTIL_FD_EXCEPTION)
                        FD_SET(ev->ev_args.FD.fd, &efds);
                    if (fdmax < ev->ev_args.FD.fd)
                        fdmax = ev->ev_args.FD.fd;
                }
                /* Filedescriptor Set Select I/O */
                else if (ev->ev_type == PTH_EVENT_SELECT) {
                    /* filedescriptors are checked later all at once.
                       Here we only merge the fd sets. */
                    pth_util_fds_merge(ev->ev_args.SELECT.nfd,
                                       ev->ev_args.SELECT.rfds, &rfds,
                                       ev->ev_args.SELECT.wfds, &wfds,
                                       ev->ev_args.SELECT.efds, &efds);
                    if (fdmax < ev->ev_args.SELECT.nfd-1)
                        fdmax = ev->ev_args.SELECT.nfd-1;
                }
                /* Signal Set */
                else if (ev->ev_type == PTH_EVENT_SIGS) {
                    for (sig = 1; sig < PTH_NSIG; sig++) {
                        if (sigismember(ev->ev_args.SIGS.sigs, sig)) {
                            /* thread signal handling */
                            if (sigismember(&t->sigpending, sig)) {
                                *(ev->ev_args.SIGS.sig) = sig;
                                sigdelset(&t->sigpending, sig);
                                t->sigpendcnt--;
                                this_occurred = TRUE;
                            }
                            /* process signal handling */
                            if (sigismember(&pth_sigpending, sig)) {
                                if (ev->ev_args.SIGS.sig != NULL)
                                    *(ev->ev_args.SIGS.sig) = sig;
                                pth_util_sigdelete(sig);
                                sigdelset(&pth_sigpending, sig);
                                this_occurred = TRUE;
                            }
                            else {
                                sigdelset(&pth_sigblock, sig);
                                sigaddset(&pth_sigcatch, sig);
                            }
                        }
                    }
                }
                /* Timer */
                else if (ev->ev_type == PTH_EVENT_TIME) {
                    if (pth_time_cmp(&(ev->ev_args.TIME.tv), now) < 0)
                        this_occurred = TRUE;
                    else {
                        /* remember the timer which will be elapsed next */
                        if ((nexttimer_thread == NULL && nexttimer_ev == NULL) ||
                            pth_time_cmp(&(ev->ev_args.TIME.tv), &nexttimer_value) < 0) {
                            nexttimer_thread = t;
                            nexttimer_ev = ev;
                            pth_time_set(&nexttimer_value, &(ev->ev_args.TIME.tv));
                        }
                    }
                }
                /* Message Port Arrivals */
                else if (ev->ev_type == PTH_EVENT_MSG) {
                    if (pth_ring_elements(&(ev->ev_args.MSG.mp->mp_queue)) > 0)
                        this_occurred = TRUE;
                }
                /* Mutex Release */
                else if (ev->ev_type == PTH_EVENT_MUTEX) {
                    if (!(ev->ev_args.MUTEX.mutex->mx_state & PTH_MUTEX_LOCKED))
                        this_occurred = TRUE;
                }
                /* Condition Variable Signal */
                else if (ev->ev_type == PTH_EVENT_COND) {
                    if (ev->ev_args.COND.cond->cn_state & PTH_COND_SIGNALED) {
                        if (ev->ev_args.COND.cond->cn_state & PTH_COND_BROADCAST)
                            this_occurred = TRUE;
                        else {
                            if (!(ev->ev_args.COND.cond->cn_state & PTH_COND_HANDLED)) {
                                ev->ev_args.COND.cond->cn_state |= PTH_COND_HANDLED;
                                this_occurred = TRUE;
                            }
                        }
                    }
                }
                /* Thread Termination */
                else if (ev->ev_type == PTH_EVENT_TID) {
                    if (   (   ev->ev_args.TID.tid == NULL
                            && pth_pqueue_elements(&pth_DQ) > 0)
                        || (   ev->ev_args.TID.tid != NULL
                            && ev->ev_args.TID.tid->state == ev->ev_goal))
                        this_occurred = TRUE;
                }
                /* Custom Event Function */
                else if (ev->ev_type == PTH_EVENT_FUNC) {
                    if (ev->ev_args.FUNC.func(ev->ev_args.FUNC.arg))
                        this_occurred = TRUE;
                    else {
                        pth_time_t tv;
                        pth_time_set(&tv, now);
                        pth_time_add(&tv, &(ev->ev_args.FUNC.tv));
                        if ((nexttimer_thread == NULL && nexttimer_ev == NULL) ||
                            pth_time_cmp(&tv, &nexttimer_value) < 0) {
                            nexttimer_thread = t;
                            nexttimer_ev = ev;
                            pth_time_set(&nexttimer_value, &tv);
                        }
                    }
                }

                /* tag event if it has occurred */
                if (this_occurred) {
                    pth_debug2("pth_sched_eventmanager: [non-I/O] event occurred for thread \"%s\"", t->name);
                    ev->ev_status = PTH_STATUS_OCCURRED;
                    any_occurred = TRUE;
                }
            }
        } while ((ev = ev->ev_next) != evh);
    }
    if (any_occurred)
        dopoll = TRUE;

    /* now decide how to poll for fd I/O and timers */
    if (dopoll) {
        /* do a polling with immediate timeout,
           i.e. check the fd sets only without blocking */
        pth_time_set(&delay, PTH_TIME_ZERO);
        pdelay = &delay;
    }
    else if (nexttimer_ev != NULL) {
        /* do a polling with a timeout set to the next timer,
           i.e. wait for the fd sets or the next timer */
        pth_time_set(&delay, &nexttimer_value);
        pth_time_sub(&delay, now);
        pdelay = &delay;
    }
    else {
        /* do a polling without a timeout,
           i.e. wait for the fd sets only with blocking */
        pdelay = NULL;
    }

    /* clear pipe and let select() wait for the read-part of the pipe */
    while (pth_sc(read)(pth_sigpipe[0], minibuf, sizeof(minibuf)) > 0) ;
    FD_SET(pth_sigpipe[0], &rfds);
    if (fdmax < pth_sigpipe[0])
        fdmax = pth_sigpipe[0];

    /* replace signal actions for signals we've to catch for events */
    for (sig = 1; sig < PTH_NSIG; sig++) {
        if (sigismember(&pth_sigcatch, sig)) {
            sa.sa_handler = pth_sched_eventmanager_sighandler;
            sigfillset(&sa.sa_mask);
            sa.sa_flags = 0;
            sigaction(sig, &sa, &osa[sig]);
        }
    }

    /* allow some signals to be delivered: Either to our
       catching handler or directly to the configured
       handler for signals not catched by events */
    pth_sc(sigprocmask)(SIG_SETMASK, &pth_sigblock, &oss);

    /* now do the polling for filedescriptor I/O and timers
       WHEN THE SCHEDULER SLEEPS AT ALL, THEN HERE!! */
    rc = -1;
    if (!(dopoll && fdmax == -1))
        while ((rc = pth_sc(select)(fdmax+1, &rfds, &wfds, &efds, pdelay)) < 0
               && errno == EINTR) ;

    /* restore signal mask and actions and handle signals */
    pth_sc(sigprocmask)(SIG_SETMASK, &oss, NULL);
    for (sig = 1; sig < PTH_NSIG; sig++)
        if (sigismember(&pth_sigcatch, sig))
            sigaction(sig, &osa[sig], NULL);

    /* if the timer elapsed, handle it */
    if (!dopoll && rc == 0 && nexttimer_ev != NULL) {
        if (nexttimer_ev->ev_type == PTH_EVENT_FUNC) {
            /* it was an implicit timer event for a function event,
               so repeat the event handling for rechecking the function */
            loop_repeat = TRUE;
        }
        else {
            /* it was an explicit timer event, standing for its own */
            pth_debug2("pth_sched_eventmanager: [timeout] event occurred for thread \"%s\"",
                       nexttimer_thread->name);
            nexttimer_ev->ev_status = PTH_STATUS_OCCURRED;
        }
    }

    /* if the internal signal pipe was used, adjust the select() results */
    if (!dopoll && rc > 0 && FD_ISSET(pth_sigpipe[0], &rfds)) {
        FD_CLR(pth_sigpipe[0], &rfds);
        rc--;
    }

    /* if an error occurred, avoid confusion in the cleanup loop */
    if (rc <= 0) {
        FD_ZERO(&rfds);
        FD_ZERO(&wfds);
        FD_ZERO(&efds);
    }

    /* now comes the final cleanup loop where we've to
       do two jobs: first we've to do the late handling of the fd I/O events and
       additionally if a thread has one occurred event, we move it from the
       waiting queue to the ready queue */

    /* for all threads in the waiting queue... */
    t = pth_pqueue_head(&pth_WQ);
    while (t != NULL) {

        /* do the late handling of the fd I/O and signal
           events in the waiting event ring */
        any_occurred = FALSE;
        if (t->events != NULL) {
            ev = evh = t->events;
            do {
                /*
                 * Late handling for still not occured events
                 */
                if (ev->ev_status == PTH_STATUS_PENDING) {
                    /* Filedescriptor I/O */
                    if (ev->ev_type == PTH_EVENT_FD) {
                        if (   (   ev->ev_goal & PTH_UNTIL_FD_READABLE
                                && FD_ISSET(ev->ev_args.FD.fd, &rfds))
                            || (   ev->ev_goal & PTH_UNTIL_FD_WRITEABLE
                                && FD_ISSET(ev->ev_args.FD.fd, &wfds))
                            || (   ev->ev_goal & PTH_UNTIL_FD_EXCEPTION
                                && FD_ISSET(ev->ev_args.FD.fd, &efds)) ) {
                            pth_debug2("pth_sched_eventmanager: "
                                       "[I/O] event occurred for thread \"%s\"", t->name);
                            ev->ev_status = PTH_STATUS_OCCURRED;
                        }
                        else if (rc < 0) {
                            /* re-check particular filedescriptor */
                            int rc2;
                            if (ev->ev_goal & PTH_UNTIL_FD_READABLE)
                                FD_SET(ev->ev_args.FD.fd, &rfds);
                            if (ev->ev_goal & PTH_UNTIL_FD_WRITEABLE)
                                FD_SET(ev->ev_args.FD.fd, &wfds);
                            if (ev->ev_goal & PTH_UNTIL_FD_EXCEPTION)
                                FD_SET(ev->ev_args.FD.fd, &efds);
                            pth_time_set(&delay, PTH_TIME_ZERO);
                            while ((rc2 = pth_sc(select)(ev->ev_args.FD.fd+1, &rfds, &wfds, &efds, &delay)) < 0
                                   && errno == EINTR) ;
                            if (rc2 > 0) {
                                /* cleanup afterwards for next iteration */
                                FD_CLR(ev->ev_args.FD.fd, &rfds);
                                FD_CLR(ev->ev_args.FD.fd, &wfds);
                                FD_CLR(ev->ev_args.FD.fd, &efds);
                            } else if (rc2 < 0) {
                                /* cleanup afterwards for next iteration */
                                FD_ZERO(&rfds);
                                FD_ZERO(&wfds);
                                FD_ZERO(&efds);
                                ev->ev_status = PTH_STATUS_FAILED;
                                pth_debug2("pth_sched_eventmanager: "
                                           "[I/O] event failed for thread \"%s\"", t->name);
                            }
                        }
                    }
                    /* Filedescriptor Set I/O */
                    else if (ev->ev_type == PTH_EVENT_SELECT) {
                        if (pth_util_fds_test(ev->ev_args.SELECT.nfd,
                                              ev->ev_args.SELECT.rfds, &rfds,
                                              ev->ev_args.SELECT.wfds, &wfds,
                                              ev->ev_args.SELECT.efds, &efds)) {
                            n = pth_util_fds_select(ev->ev_args.SELECT.nfd,
                                                    ev->ev_args.SELECT.rfds, &rfds,
                                                    ev->ev_args.SELECT.wfds, &wfds,
                                                    ev->ev_args.SELECT.efds, &efds);
                            if (ev->ev_args.SELECT.n != NULL)
                                *(ev->ev_args.SELECT.n) = n;
                            ev->ev_status = PTH_STATUS_OCCURRED;
                            pth_debug2("pth_sched_eventmanager: "
                                       "[I/O] event occurred for thread \"%s\"", t->name);
                        }
                        else if (rc < 0) {
                            /* re-check particular filedescriptor set */
                            int rc2;
                            fd_set *prfds = NULL;
                            fd_set *pwfds = NULL;
                            fd_set *pefds = NULL;
                            fd_set trfds;
                            fd_set twfds;
                            fd_set tefds;
                            if (ev->ev_args.SELECT.rfds) {
                                memcpy(&trfds, ev->ev_args.SELECT.rfds, sizeof(rfds));
                                prfds = &trfds;
                            }
                            if (ev->ev_args.SELECT.wfds) {
                                memcpy(&twfds, ev->ev_args.SELECT.wfds, sizeof(wfds));
                                pwfds = &twfds;
                            }
                            if (ev->ev_args.SELECT.efds) {
                                memcpy(&tefds, ev->ev_args.SELECT.efds, sizeof(efds));
                                pefds = &tefds;
                            }
                            pth_time_set(&delay, PTH_TIME_ZERO);
                            while ((rc2 = pth_sc(select)(ev->ev_args.SELECT.nfd+1, prfds, pwfds, pefds, &delay)) < 0
                                   && errno == EINTR) ;
                            if (rc2 < 0) {
                                ev->ev_status = PTH_STATUS_FAILED;
                                pth_debug2("pth_sched_eventmanager: "
                                           "[I/O] event failed for thread \"%s\"", t->name);
                            }
                        }
                    }
                    /* Signal Set */
                    else if (ev->ev_type == PTH_EVENT_SIGS) {
                        for (sig = 1; sig < PTH_NSIG; sig++) {
                            if (sigismember(ev->ev_args.SIGS.sigs, sig)) {
                                if (sigismember(&pth_sigraised, sig)) {
                                    if (ev->ev_args.SIGS.sig != NULL)
                                        *(ev->ev_args.SIGS.sig) = sig;
                                    pth_debug2("pth_sched_eventmanager: "
                                               "[signal] event occurred for thread \"%s\"", t->name);
                                    sigdelset(&pth_sigraised, sig);
                                    ev->ev_status = PTH_STATUS_OCCURRED;
                                }
                            }
                        }
                    }
                }
                /*
                 * post-processing for already occured events
                 */
                else {
                    /* Condition Variable Signal */
                    if (ev->ev_type == PTH_EVENT_COND) {
                        /* clean signal */
                        if (ev->ev_args.COND.cond->cn_state & PTH_COND_SIGNALED) {
                            ev->ev_args.COND.cond->cn_state &= ~(PTH_COND_SIGNALED);
                            ev->ev_args.COND.cond->cn_state &= ~(PTH_COND_BROADCAST);
                            ev->ev_args.COND.cond->cn_state &= ~(PTH_COND_HANDLED);
                        }
                    }
                }

                /* local to global mapping */
                if (ev->ev_status != PTH_STATUS_PENDING)
                    any_occurred = TRUE;
            } while ((ev = ev->ev_next) != evh);
        }

        /* cancellation support */
        if (t->cancelreq == TRUE) {
            pth_debug2("pth_sched_eventmanager: cancellation request pending for thread \"%s\"", t->name);
            any_occurred = TRUE;
        }

        /* walk to next thread in waiting queue */
        tlast = t;
        t = pth_pqueue_walk(&pth_WQ, t, PTH_WALK_NEXT);

        /*
         * move last thread to ready queue if any events occurred for it.
         * we insert it with a slightly increased queue priority to it a
         * better chance to immediately get scheduled, else the last running
         * thread might immediately get again the CPU which is usually not
         * what we want, because we oven use pth_yield() calls to give others
         * a chance.
         */
        if (any_occurred) {
            pth_pqueue_delete(&pth_WQ, tlast);
            tlast->state = PTH_STATE_READY;
            pth_pqueue_insert(&pth_RQ, tlast->prio+1, tlast);
            pth_debug2("pth_sched_eventmanager: thread \"%s\" moved from waiting "
                       "to ready queue", tlast->name);
        }
    }

    /* perhaps we have to internally loop... */
    if (loop_repeat) {
        pth_time_set(now, PTH_TIME_NOW);
        goto loop_entry;
    }

    pth_debug1("pth_sched_eventmanager: leaving");
    return;
}

intern void pth_sched_eventmanager_sighandler(int sig)
{
    char c;

    /* remember raised signal */
    sigaddset(&pth_sigraised, sig);

    /* write signal to signal pipe in order to awake the select() */
    c = (int)sig;
    pth_sc(write)(pth_sigpipe[1], &c, sizeof(char));
    return;
}

/* ==== pth_data.c ==== */

struct pth_keytab_st {
    int used;
    void (*destructor)(void *);
};

static struct pth_keytab_st pth_keytab[PTH_KEY_MAX];

int pth_key_create(pth_key_t *key, void (*func)(void *))
{
    for ((*key) = 0; (*key) < PTH_KEY_MAX; (*key)++) {
        if (pth_keytab[(*key)].used == FALSE) {
            pth_keytab[(*key)].used = TRUE;
            pth_keytab[(*key)].destructor = func;
            return TRUE;
        }
    }
    return pth_error(FALSE, EAGAIN);
}

int pth_key_delete(pth_key_t key)
{
    if (key >= PTH_KEY_MAX)
        return pth_error(FALSE, EINVAL);
    if (!pth_keytab[key].used)
        return pth_error(FALSE, EINVAL);
    pth_keytab[key].used = FALSE;
    return TRUE;
}

int pth_key_setdata(pth_key_t key, const void *value)
{
    if (key >= PTH_KEY_MAX)
        return pth_error(FALSE, EINVAL);
    if (!pth_keytab[key].used)
        return pth_error(FALSE, EINVAL);
    if (pth_current->data_value == NULL) {
        pth_current->data_value = (const void **)calloc(1, sizeof(void *)*PTH_KEY_MAX);
        if (pth_current->data_value == NULL)
            return pth_error(FALSE, ENOMEM);
    }
    if (pth_current->data_value[key] == NULL) {
        if (value != NULL)
            pth_current->data_count++;
    }
    else {
        if (value == NULL)
            pth_current->data_count--;
    }
    pth_current->data_value[key] = value;
    return TRUE;
}

void *pth_key_getdata(pth_key_t key)
{
    if (key >= PTH_KEY_MAX)
        return pth_error((void *)NULL, EINVAL);
    if (!pth_keytab[key].used)
        return pth_error((void *)NULL, EINVAL);
    if (pth_current->data_value == NULL)
        return (void *)NULL;
    return (void *)pth_current->data_value[key];
}

intern void pth_key_destroydata(pth_t t)
{
    void *data;
    int key;
    int itr;
    void (*destructor)(void *);

    if (t == NULL)
        return;
    if (t->data_value == NULL)
        return;
    /* POSIX thread iteration scheme */
    for (itr = 0; itr < PTH_DESTRUCTOR_ITERATIONS; itr++) {
        for (key = 0; key < PTH_KEY_MAX; key++) {
            if (t->data_count > 0) {
                destructor = NULL;
                data = NULL;
                if (pth_keytab[key].used) {
                    if (t->data_value[key] != NULL) {
                        data = (void *)t->data_value[key];
                        t->data_value[key] = NULL;
                        t->data_count--;
                        destructor = pth_keytab[key].destructor;
                    }
                }
                if (destructor != NULL)
                    destructor(data);
            }
            if (t->data_count == 0)
                break;
        }
        if (t->data_count == 0)
            break;
    }
    free(t->data_value);
    t->data_value = NULL;
    return;
}

/* ==== pth_msg.c ==== */

#if cpp

/* message port structure */
struct pth_msgport_st {
    pth_ringnode_t mp_node;  /* maintainance node handle */
    const char    *mp_name;  /* optional name of message port */
    pth_t          mp_tid;   /* corresponding thread */
    pth_ring_t     mp_queue; /* queue of messages pending on port */
};

#endif /* cpp */

static pth_ring_t pth_msgport = PTH_RING_INIT;

/* create a new message port */
pth_msgport_t pth_msgport_create(const char *name)
{
    pth_msgport_t mp;

    /* Notice: "name" is allowed to be NULL */

    /* allocate message port structure */
    if ((mp = (pth_msgport_t)malloc(sizeof(struct pth_msgport_st))) == NULL)
        return pth_error((pth_msgport_t)NULL, ENOMEM);

    /* initialize structure */
    mp->mp_name  = name;
    mp->mp_tid   = pth_current;
    pth_ring_init(&mp->mp_queue);

    /* insert into list of existing message ports */
    pth_ring_append(&pth_msgport, &mp->mp_node);

    return mp;
}

/* delete a message port */
void pth_msgport_destroy(pth_msgport_t mp)
{
    pth_message_t *m;

    /* check input */
    if (mp == NULL)
        return;

    /* first reply to all pending messages */
    while ((m = pth_msgport_get(mp)) != NULL)
        pth_msgport_reply(m);

    /* remove from list of existing message ports */
    pth_ring_delete(&pth_msgport, &mp->mp_node);

    /* deallocate message port structure */
    free(mp);

    return;
}

/* find a known message port through name */
pth_msgport_t pth_msgport_find(const char *name)
{
    pth_msgport_t mp, mpf;

    /* check input */
    if (name == NULL)
        return pth_error((pth_msgport_t)NULL, EINVAL);

    /* iterate over message ports */
    mp = mpf = (pth_msgport_t)pth_ring_first(&pth_msgport);
    while (mp != NULL) {
        if (strcmp(mp->mp_name, name) == 0)
            break;
        mp = (pth_msgport_t)pth_ring_next(&pth_msgport, (pth_ringnode_t *)mp);
        if (mp == mpf) {
            mp = NULL;
            break;
        }
    }
    return mp;
}

/* number of message on a port */
int pth_msgport_pending(pth_msgport_t mp)
{
    if (mp == NULL)
        return pth_error(-1, EINVAL);
    return pth_ring_elements(&mp->mp_queue);
}

/* put a message on a port */
int pth_msgport_put(pth_msgport_t mp, pth_message_t *m)
{
    if (mp == NULL)
        return pth_error(FALSE, EINVAL);
    pth_ring_append(&mp->mp_queue, (pth_ringnode_t *)m);
    return TRUE;
}

/* push (prepend) a message onto a port (MDR) */
int pth_msgport_push(pth_msgport_t mp, pth_message_t *m)
{
    if (mp == NULL)
        return pth_error(FALSE, EINVAL);
    pth_ring_prepend(&mp->mp_queue, (pth_ringnode_t *)m);
    return TRUE;
}

/* get top message from a port */
pth_message_t *pth_msgport_get(pth_msgport_t mp)
{
    pth_message_t *m;

    if (mp == NULL)
        return pth_error((pth_message_t *)NULL, EINVAL);
    m = (pth_message_t *)pth_ring_pop(&mp->mp_queue);
    return m;
}

/* reply message to sender */
int pth_msgport_reply(pth_message_t *m)
{
    if (m == NULL)
        return pth_error(FALSE, EINVAL);
    return pth_msgport_put(m->m_replyport, m);
}

/* ==== pth_cancel.c ==== */

/* set cancellation state */
void pth_cancel_state(int newstate, int *oldstate)
{
    if (oldstate != NULL)
        *oldstate = pth_current->cancelstate;
    if (newstate != 0)
        pth_current->cancelstate = newstate;
    return;
}

/* enter a cancellation point */
void pth_cancel_point(void)
{
    if (   pth_current->cancelreq == TRUE
        && pth_current->cancelstate & PTH_CANCEL_ENABLE) {
        /* avoid looping if cleanup handlers contain cancellation points */
        pth_current->cancelreq = FALSE;
        pth_debug2("pth_cancel_point: terminating cancelled thread \"%s\"", pth_current->name);
        pth_exit(PTH_CANCELED);
    }
    return;
}

/* cancel a thread (the friendly way) */
int pth_cancel(pth_t thread)
{
    pth_pqueue_t *q;

    if (thread == NULL)
        return pth_error(FALSE, EINVAL);

    /* the current thread cannot be cancelled */
    if (thread == pth_current)
        return pth_error(FALSE, EINVAL);

    /* the thread has to be at least still alive */
    if (thread->state == PTH_STATE_DEAD)
        return pth_error(FALSE, EPERM);

    /* now mark the thread as cancelled */
    thread->cancelreq = TRUE;

    /* when cancellation is enabled in async mode we cancel the thread immediately */
    if (   thread->cancelstate & PTH_CANCEL_ENABLE
        && thread->cancelstate & PTH_CANCEL_ASYNCHRONOUS) {

        /* remove thread from its queue */
        switch (thread->state) {
            case PTH_STATE_NEW:     q = &pth_NQ; break;
            case PTH_STATE_READY:   q = &pth_RQ; break;
            case PTH_STATE_WAITING: q = &pth_WQ; break;
            default:                q = NULL;
        }
        if (q == NULL)
            return pth_error(FALSE, ESRCH);
        if (!pth_pqueue_contains(q, thread))
            return pth_error(FALSE, ESRCH);
        pth_pqueue_delete(q, thread);

        /* execute cleanups */
        pth_thread_cleanup(thread);

        /* and now either kick it out or move it to dead queue */
        if (!thread->joinable) {
            pth_debug2("pth_cancel: kicking out cancelled thread \"%s\" immediately", thread->name);
            pth_tcb_free(thread);
        }
        else {
            pth_debug2("pth_cancel: moving cancelled thread \"%s\" to dead queue", thread->name);
            thread->join_arg = PTH_CANCELED;
            thread->state = PTH_STATE_DEAD;
            pth_pqueue_insert(&pth_DQ, PTH_PRIO_STD, thread);
        }
    }
    return TRUE;
}

/* abort a thread (the cruel way) */
int pth_abort(pth_t thread)
{
    if (thread == NULL)
        return pth_error(FALSE, EINVAL);

    /* the current thread cannot be aborted */
    if (thread == pth_current)
        return pth_error(FALSE, EINVAL);

    if (thread->state == PTH_STATE_DEAD && thread->joinable) {
        /* if thread is already terminated, just join it */
        if (!pth_join(thread, NULL))
            return FALSE;
    }
    else {
        /* else force it to be detached and cancel it asynchronously */
        thread->joinable = FALSE;
        thread->cancelstate = (PTH_CANCEL_ENABLE|PTH_CANCEL_ASYNCHRONOUS);
        if (!pth_cancel(thread))
            return FALSE;
    }
    return TRUE;
}

/* ==== pth_sync.c ==== */

/*
**  Mutual Exclusion Locks
*/

int pth_mutex_init(pth_mutex_t *mutex)
{
    if (mutex == NULL)
        return pth_error(FALSE, EINVAL);
    mutex->mx_state = PTH_MUTEX_INITIALIZED;
    mutex->mx_owner = NULL;
    mutex->mx_count = 0;
    return TRUE;
}

int pth_mutex_acquire(pth_mutex_t *mutex, int tryonly, pth_event_t ev_extra)
{
    static pth_key_t ev_key = PTH_KEY_INIT;
    pth_event_t ev;

    pth_debug2("pth_mutex_acquire: called from thread \"%s\"", pth_current->name);

    /* consistency checks */
    if (mutex == NULL)
        return pth_error(FALSE, EINVAL);
    if (!(mutex->mx_state & PTH_MUTEX_INITIALIZED))
        return pth_error(FALSE, EDEADLK);

    /* still not locked, so simply acquire mutex? */
    if (!(mutex->mx_state & PTH_MUTEX_LOCKED)) {
        mutex->mx_state |= PTH_MUTEX_LOCKED;
        mutex->mx_owner = pth_current;
        mutex->mx_count = 1;
        pth_ring_append(&(pth_current->mutexring), &(mutex->mx_node));
        pth_debug1("pth_mutex_acquire: immediately locking mutex");
        return TRUE;
    }

    /* already locked by caller? */
    if (mutex->mx_count >= 1 && mutex->mx_owner == pth_current) {
        /* recursive lock */
        mutex->mx_count++;
        pth_debug1("pth_mutex_acquire: recursive locking");
        return TRUE;
    }

    /* should we just tryonly? */
    if (tryonly)
        return pth_error(FALSE, EBUSY);

    /* else wait for mutex to become unlocked.. */
    pth_debug1("pth_mutex_acquire: wait until mutex is unlocked");
    for (;;) {
        ev = pth_event(PTH_EVENT_MUTEX|PTH_MODE_STATIC, &ev_key, mutex);
        if (ev_extra != NULL)
            pth_event_concat(ev, ev_extra, NULL);
        pth_wait(ev);
        if (ev_extra != NULL) {
            pth_event_isolate(ev);
            if (pth_event_status(ev) == PTH_STATUS_PENDING)
                return pth_error(FALSE, EINTR);
        }
        if (!(mutex->mx_state & PTH_MUTEX_LOCKED))
            break;
    }

    /* now it's again unlocked, so acquire mutex */
    pth_debug1("pth_mutex_acquire: locking mutex");
    mutex->mx_state |= PTH_MUTEX_LOCKED;
    mutex->mx_owner = pth_current;
    mutex->mx_count = 1;
    pth_ring_append(&(pth_current->mutexring), &(mutex->mx_node));
    return TRUE;
}

int pth_mutex_release(pth_mutex_t *mutex)
{
    /* consistency checks */
    if (mutex == NULL)
        return pth_error(FALSE, EINVAL);
    if (!(mutex->mx_state & PTH_MUTEX_INITIALIZED))
        return pth_error(FALSE, EDEADLK);
    if (!(mutex->mx_state & PTH_MUTEX_LOCKED))
        return pth_error(FALSE, EDEADLK);
    if (mutex->mx_owner != pth_current)
        return pth_error(FALSE, EACCES);

    /* decrement recursion counter and release mutex */
    mutex->mx_count--;
    if (mutex->mx_count <= 0) {
        mutex->mx_state &= ~(PTH_MUTEX_LOCKED);
        mutex->mx_owner = NULL;
        mutex->mx_count = 0;
        pth_ring_delete(&(pth_current->mutexring), &(mutex->mx_node));
    }
    return TRUE;
}

intern void pth_mutex_releaseall(pth_t thread)
{
    pth_ringnode_t *rn, *rnf;

    if (thread == NULL)
        return;
    /* iterate over all mutexes of thread */
    rn = rnf = pth_ring_first(&(thread->mutexring));
    while (rn != NULL) {
        pth_mutex_release((pth_mutex_t *)rn);
        rn = pth_ring_next(&(thread->mutexring), rn);
        if (rn == rnf)
            break;
    }
    return;
}

/*
**  Read-Write Locks
*/

int pth_rwlock_init(pth_rwlock_t *rwlock)
{
    if (rwlock == NULL)
        return pth_error(FALSE, EINVAL);
    rwlock->rw_state = PTH_RWLOCK_INITIALIZED;
    rwlock->rw_readers = 0;
    pth_mutex_init(&(rwlock->rw_mutex_rd));
    pth_mutex_init(&(rwlock->rw_mutex_rw));
    return TRUE;
}

int pth_rwlock_acquire(pth_rwlock_t *rwlock, int op, int tryonly, pth_event_t ev_extra)
{
    /* consistency checks */
    if (rwlock == NULL)
        return pth_error(FALSE, EINVAL);
    if (!(rwlock->rw_state & PTH_RWLOCK_INITIALIZED))
        return pth_error(FALSE, EDEADLK);

    /* acquire lock */
    if (op == PTH_RWLOCK_RW) {
        /* read-write lock is simple */
        if (!pth_mutex_acquire(&(rwlock->rw_mutex_rw), tryonly, ev_extra))
            return FALSE;
        rwlock->rw_mode = PTH_RWLOCK_RW;
    }
    else {
        /* read-only lock is more complicated to get right */
        if (!pth_mutex_acquire(&(rwlock->rw_mutex_rd), tryonly, ev_extra))
            return FALSE;
        rwlock->rw_readers++;
        if (rwlock->rw_readers == 1) {
            if (!pth_mutex_acquire(&(rwlock->rw_mutex_rw), tryonly, ev_extra)) {
                rwlock->rw_readers--;
                pth_shield { pth_mutex_release(&(rwlock->rw_mutex_rd)); }
                return FALSE;
            }
        }
        rwlock->rw_mode = PTH_RWLOCK_RD;
        pth_mutex_release(&(rwlock->rw_mutex_rd));
    }
    return TRUE;
}

int pth_rwlock_release(pth_rwlock_t *rwlock)
{
    /* consistency checks */
    if (rwlock == NULL)
        return pth_error(FALSE, EINVAL);
    if (!(rwlock->rw_state & PTH_RWLOCK_INITIALIZED))
        return pth_error(FALSE, EDEADLK);

    /* release lock */
    if (rwlock->rw_mode == PTH_RWLOCK_RW) {
        /* read-write unlock is simple */
        if (!pth_mutex_release(&(rwlock->rw_mutex_rw)))
            return FALSE;
    }
    else {
        /* read-only unlock is more complicated to get right */
        if (!pth_mutex_acquire(&(rwlock->rw_mutex_rd), FALSE, NULL))
            return FALSE;
        rwlock->rw_readers--;
        if (rwlock->rw_readers == 0) {
            if (!pth_mutex_release(&(rwlock->rw_mutex_rw))) {
                rwlock->rw_readers++;
                pth_shield { pth_mutex_release(&(rwlock->rw_mutex_rd)); }
                return FALSE;
            }
        }
        rwlock->rw_mode = PTH_RWLOCK_RD;
        pth_mutex_release(&(rwlock->rw_mutex_rd));
    }
    return TRUE;
}

/*
**  Condition Variables
*/

int pth_cond_init(pth_cond_t *cond)
{
    if (cond == NULL)
        return pth_error(FALSE, EINVAL);
    cond->cn_state   = PTH_COND_INITIALIZED;
    cond->cn_waiters = 0;
    return TRUE;
}

static void pth_cond_cleanup_handler(void *_cleanvec)
{
    pth_mutex_t *mutex = (pth_mutex_t *)(((void **)_cleanvec)[0]);
    pth_cond_t  *cond  = (pth_cond_t  *)(((void **)_cleanvec)[1]);

    /* re-acquire mutex when pth_cond_await() is cancelled
       in order to restore the condition variable semantics */
    pth_mutex_acquire(mutex, FALSE, NULL);

    /* fix number of waiters */
    cond->cn_waiters--;
    return;
}

int pth_cond_await(pth_cond_t *cond, pth_mutex_t *mutex, pth_event_t ev_extra)
{
    static pth_key_t ev_key = PTH_KEY_INIT;
    void *cleanvec[2];
    pth_event_t ev;

    /* consistency checks */
    if (cond == NULL || mutex == NULL)
        return pth_error(FALSE, EINVAL);
    if (!(cond->cn_state & PTH_COND_INITIALIZED))
        return pth_error(FALSE, EDEADLK);

    /* check whether we can do a short-circuit wait */
    if (    (cond->cn_state & PTH_COND_SIGNALED)
        && !(cond->cn_state & PTH_COND_BROADCAST)) {
        cond->cn_state &= ~(PTH_COND_SIGNALED);
        cond->cn_state &= ~(PTH_COND_BROADCAST);
        cond->cn_state &= ~(PTH_COND_HANDLED);
        return TRUE;
    }

    /* add us to the number of waiters */
    cond->cn_waiters++;

    /* release mutex (caller had to acquire it first) */
    pth_mutex_release(mutex);

    /* wait until the condition is signaled */
    ev = pth_event(PTH_EVENT_COND|PTH_MODE_STATIC, &ev_key, cond);
    if (ev_extra != NULL)
        pth_event_concat(ev, ev_extra, NULL);
    cleanvec[0] = mutex;
    cleanvec[1] = cond;
    pth_cleanup_push(pth_cond_cleanup_handler, cleanvec);
    pth_wait(ev);
    pth_cleanup_pop(FALSE);
    if (ev_extra != NULL)
        pth_event_isolate(ev);

    /* reacquire mutex */
    pth_mutex_acquire(mutex, FALSE, NULL);

    /* remove us from the number of waiters */
    cond->cn_waiters--;

    /* release mutex (caller had to acquire it first) */
    return TRUE;
}

int pth_cond_notify(pth_cond_t *cond, int broadcast)
{
    /* consistency checks */
    if (cond == NULL)
        return pth_error(FALSE, EINVAL);
    if (!(cond->cn_state & PTH_COND_INITIALIZED))
        return pth_error(FALSE, EDEADLK);

    /* do something only if there is at least one waiters (POSIX semantics) */
    if (cond->cn_waiters > 0) {
        /* signal the condition */
        cond->cn_state |= PTH_COND_SIGNALED;
        if (broadcast)
            cond->cn_state |= PTH_COND_BROADCAST;
        else
            cond->cn_state &= ~(PTH_COND_BROADCAST);
        cond->cn_state &= ~(PTH_COND_HANDLED);

        /* and give other threads a chance to awake */
        pth_yield(NULL);
    }

    /* return to caller */
    return TRUE;
}

/*
**  Barriers
*/

int pth_barrier_init(pth_barrier_t *barrier, int threshold)
{
    if (barrier == NULL || threshold <= 0)
        return pth_error(FALSE, EINVAL);
    if (!pth_mutex_init(&(barrier->br_mutex)))
        return FALSE;
    if (!pth_cond_init(&(barrier->br_cond)))
        return FALSE;
    barrier->br_state     = PTH_BARRIER_INITIALIZED;
    barrier->br_threshold = threshold;
    barrier->br_count     = threshold;
    barrier->br_cycle     = FALSE;
    return TRUE;
}

int pth_barrier_reach(pth_barrier_t *barrier)
{
    int cancel, cycle;
    int rv;

    if (barrier == NULL)
        return pth_error(FALSE, EINVAL);
    if (!(barrier->br_state & PTH_BARRIER_INITIALIZED))
        return pth_error(FALSE, EINVAL);

    if (!pth_mutex_acquire(&(barrier->br_mutex), FALSE, NULL))
        return FALSE;
    cycle = barrier->br_cycle;
    if (--(barrier->br_count) == 0) {
        /* last thread reached the barrier */
        barrier->br_cycle   = !(barrier->br_cycle);
        barrier->br_count   = barrier->br_threshold;
        if ((rv = pth_cond_notify(&(barrier->br_cond), TRUE)))
            rv = PTH_BARRIER_TAILLIGHT;
    }
    else {
        /* wait until remaining threads have reached the barrier, too */
        pth_cancel_state(PTH_CANCEL_DISABLE, &cancel);
        if (barrier->br_threshold == barrier->br_count)
            rv = PTH_BARRIER_HEADLIGHT;
        else
            rv = TRUE;
        while (cycle == barrier->br_cycle) {
            if (!(rv = pth_cond_await(&(barrier->br_cond), &(barrier->br_mutex), NULL)))
                break;
        }
        pth_cancel_state(cancel, NULL);
    }
    pth_mutex_release(&(barrier->br_mutex));
    return rv;
}

/* ==== pth_attr.c ==== */

#if cpp

enum {
    PTH_ATTR_GET,
    PTH_ATTR_SET
};

struct pth_attr_st {
    pth_t        a_tid;
    int          a_prio;
    int          a_dispatches;
    char         a_name[PTH_TCB_NAMELEN];
    int          a_joinable;
    unsigned int a_cancelstate;
    unsigned int a_stacksize;
    char        *a_stackaddr;
};

#endif /* cpp */

pth_attr_t pth_attr_of(pth_t t)
{
    pth_attr_t a;

    if (t == NULL)
        return pth_error((pth_attr_t)NULL, EINVAL);
    if ((a = (pth_attr_t)malloc(sizeof(struct pth_attr_st))) == NULL)
        return pth_error((pth_attr_t)NULL, ENOMEM);
    a->a_tid = t;
    return a;
}

pth_attr_t pth_attr_new(void)
{
    pth_attr_t a;

    if ((a = (pth_attr_t)malloc(sizeof(struct pth_attr_st))) == NULL)
        return pth_error((pth_attr_t)NULL, ENOMEM);
    a->a_tid = NULL;
    pth_attr_init(a);
    return a;
}

int pth_attr_destroy(pth_attr_t a)
{
    if (a == NULL)
        return pth_error(FALSE, EINVAL);
    free(a);
    return TRUE;
}

int pth_attr_init(pth_attr_t a)
{
    if (a == NULL)
        return pth_error(FALSE, EINVAL);
    if (a->a_tid != NULL)
        return pth_error(FALSE, EPERM);
    a->a_prio = PTH_PRIO_STD;
    pth_util_cpystrn(a->a_name, "unknown", PTH_TCB_NAMELEN);
    a->a_dispatches = 0;
    a->a_joinable = TRUE;
    a->a_cancelstate = PTH_CANCEL_DEFAULT;
    a->a_stacksize = 64*1024;
    a->a_stackaddr = NULL;
    return TRUE;
}

int pth_attr_get(pth_attr_t a, int op, ...)
{
    va_list ap;
    int rc;

    va_start(ap, op);
    rc = pth_attr_ctrl(PTH_ATTR_GET, a, op, ap);
    va_end(ap);
    return rc;
}

int pth_attr_set(pth_attr_t a, int op, ...)
{
    va_list ap;
    int rc;

    va_start(ap, op);
    rc = pth_attr_ctrl(PTH_ATTR_SET, a, op, ap);
    va_end(ap);
    return rc;
}

intern int pth_attr_ctrl(int cmd, pth_attr_t a, int op, va_list ap)
{
    if (a == NULL)
        return pth_error(FALSE, EINVAL);
    switch (op) {
        case PTH_ATTR_PRIO: {
            /* priority */
            int val, *src, *dst;
            if (cmd == PTH_ATTR_SET) {
                src = &val; val = va_arg(ap, int);
                dst = (a->a_tid != NULL ? &a->a_tid->prio : &a->a_prio);
            }
            else {
                src = (a->a_tid != NULL ? &a->a_tid->prio : &a->a_prio);
                dst = va_arg(ap, int *);
            }
            *dst = *src;
            break;
        }
        case PTH_ATTR_NAME: {
            /* name */
            if (cmd == PTH_ATTR_SET) {
                char *src, *dst;
                src = va_arg(ap, char *);
                dst = (a->a_tid != NULL ? a->a_tid->name : a->a_name);
                pth_util_cpystrn(dst, src, PTH_TCB_NAMELEN);
            }
            else {
                char *src, **dst;
                src = (a->a_tid != NULL ? a->a_tid->name : a->a_name);
                dst = va_arg(ap, char **);
                *dst = src;
            }
            break;
        }
        case PTH_ATTR_DISPATCHES: {
            /* incremented on every context switch */
            int val, *src, *dst;
            if (cmd == PTH_ATTR_SET) {
                src = &val; val = va_arg(ap, int);
                dst = (a->a_tid != NULL ? &a->a_tid->dispatches : &a->a_dispatches);
            }
            else {
                src = (a->a_tid != NULL ? &a->a_tid->dispatches : &a->a_dispatches);
                dst = va_arg(ap, int *);
            }
            *dst = *src;
        }
        case PTH_ATTR_JOINABLE: {
            /* detachment type */
            int val, *src, *dst;
            if (cmd == PTH_ATTR_SET) {
                src = &val; val = va_arg(ap, int);
                dst = (a->a_tid != NULL ? &a->a_tid->joinable : &a->a_joinable);
            }
            else {
                src = (a->a_tid != NULL ? &a->a_tid->joinable : &a->a_joinable);
                dst = va_arg(ap, int *);
            }
            *dst = *src;
            break;
        }
        case PTH_ATTR_CANCEL_STATE: {
            /* cancellation state */
            unsigned int val, *src, *dst;
            if (cmd == PTH_ATTR_SET) {
                src = &val; val = va_arg(ap, unsigned int);
                dst = (a->a_tid != NULL ? &a->a_tid->cancelstate : &a->a_cancelstate);
            }
            else {
                src = (a->a_tid != NULL ? &a->a_tid->cancelstate : &a->a_cancelstate);
                dst = va_arg(ap, unsigned int *);
            }
            *dst = *src;
            break;
        }
        case PTH_ATTR_STACK_SIZE: {
            /* stack size */
            unsigned int val, *src, *dst;
            if (cmd == PTH_ATTR_SET) {
                if (a->a_tid != NULL)
                    return pth_error(FALSE, EPERM);
                src = &val; val = va_arg(ap, unsigned int);
                dst = &a->a_stacksize;
            }
            else {
                src = (a->a_tid != NULL ? &a->a_tid->stacksize : &a->a_stacksize);
                dst = va_arg(ap, unsigned int *);
            }
            *dst = *src;
            break;
        }
        case PTH_ATTR_STACK_ADDR: {
            /* stack address */
            char *val, **src, **dst;
            if (cmd == PTH_ATTR_SET) {
                if (a->a_tid != NULL)
                    return pth_error(FALSE, EPERM);
                src = &val; val = va_arg(ap, char *);
                dst = &a->a_stackaddr;
            }
            else {
                src = (a->a_tid != NULL ? &a->a_tid->stack : &a->a_stackaddr);
                dst = va_arg(ap, char **);
            }
            *dst = *src;
            break;
        }
        case PTH_ATTR_TIME_SPAWN: {
            pth_time_t *dst;
            if (cmd == PTH_ATTR_SET)
                return pth_error(FALSE, EPERM);
            dst = va_arg(ap, pth_time_t *);
            if (a->a_tid != NULL)
                pth_time_set(dst, &a->a_tid->spawned);
            else
                pth_time_set(dst, PTH_TIME_ZERO);
            break;
        }
        case PTH_ATTR_TIME_LAST: {
            pth_time_t *dst;
            if (cmd == PTH_ATTR_SET)
                return pth_error(FALSE, EPERM);
            dst = va_arg(ap, pth_time_t *);
            if (a->a_tid != NULL)
                pth_time_set(dst, &a->a_tid->lastran);
            else
                pth_time_set(dst, PTH_TIME_ZERO);
            break;
        }
        case PTH_ATTR_TIME_RAN: {
            pth_time_t *dst;
            if (cmd == PTH_ATTR_SET)
                return pth_error(FALSE, EPERM);
            dst = va_arg(ap, pth_time_t *);
            if (a->a_tid != NULL)
                pth_time_set(dst, &a->a_tid->running);
            else
                pth_time_set(dst, PTH_TIME_ZERO);
            break;
        }
        case PTH_ATTR_START_FUNC: {
            void *(**dst)(void *);
            if (cmd == PTH_ATTR_SET)
                return pth_error(FALSE, EPERM);
            if (a->a_tid == NULL)
                return pth_error(FALSE, EACCES);
            dst = (void *(**)(void *))va_arg(ap, void *);
            *dst = a->a_tid->start_func;
            break;
        }
        case PTH_ATTR_START_ARG: {
            void **dst;
            if (cmd == PTH_ATTR_SET)
                return pth_error(FALSE, EPERM);
            if (a->a_tid == NULL)
                return pth_error(FALSE, EACCES);
            dst = va_arg(ap, void **);
            *dst = a->a_tid->start_arg;
            break;
        }
        case PTH_ATTR_STATE: {
            pth_state_t *dst;
            if (cmd == PTH_ATTR_SET)
                return pth_error(FALSE, EPERM);
            if (a->a_tid == NULL)
                return pth_error(FALSE, EACCES);
            dst = va_arg(ap, pth_state_t *);
            *dst = a->a_tid->state;
            break;
        }
        case PTH_ATTR_EVENTS: {
            pth_event_t *dst;
            if (cmd == PTH_ATTR_SET)
                return pth_error(FALSE, EPERM);
            if (a->a_tid == NULL)
                return pth_error(FALSE, EACCES);
            dst = va_arg(ap, pth_event_t *);
            *dst = a->a_tid->events;
            break;
        }
        case PTH_ATTR_BOUND: {
            int *dst;
            if (cmd == PTH_ATTR_SET)
                return pth_error(FALSE, EPERM);
            dst = va_arg(ap, int *);
            *dst = (a->a_tid != NULL ? TRUE : FALSE);
            break;
        }
        default:
            return pth_error(FALSE, EINVAL);
    }
    return TRUE;
}

/* ==== pth_lib.c ==== */

/* return the hexadecimal Pth library version number */
long pth_version(void)
{
    return PTH_VERSION;
}

/* implicit initialization support */
intern int pth_initialized = FALSE;
#if cpp
#define pth_implicit_init() \
    if (!pth_initialized) \
        pth_init();
#endif

#ifdef PTH_EX
/* exception handling callback functions */
static ex_ctx_t *pth_ex_ctx(void)
{
    return &(pth_current->ex_ctx);
}
static void pth_ex_terminate(ex_t *ex)
{
    pth_exit(ex->ex_value);
}
#endif

/* initialize the package */
int pth_init(void)
{
    pth_attr_t t_attr;

    /* support for implicit initialization calls
       and to prevent multiple explict initialization, too */
    if (pth_initialized)
        return pth_error(FALSE, EPERM);
    else
        pth_initialized = TRUE;

    pth_debug1("pth_init: enter");

    /* initialize syscall wrapping */
    pth_syscall_init();

    /* initialize the scheduler */
    if (!pth_scheduler_init()) {
        pth_shield { pth_syscall_kill(); }
        return pth_error(FALSE, EAGAIN);
    }

#ifdef PTH_EX
    /* optional support for exceptional handling */
    __ex_ctx       = pth_ex_ctx;
    __ex_terminate = pth_ex_terminate;
#endif

    /* spawn the scheduler thread */
    t_attr = pth_attr_new();
    pth_attr_set(t_attr, PTH_ATTR_PRIO,         PTH_PRIO_MAX);
    pth_attr_set(t_attr, PTH_ATTR_NAME,         "**SCHEDULER**");
    pth_attr_set(t_attr, PTH_ATTR_JOINABLE,     FALSE);
    pth_attr_set(t_attr, PTH_ATTR_CANCEL_STATE, PTH_CANCEL_DISABLE);
    pth_attr_set(t_attr, PTH_ATTR_STACK_SIZE,   64*1024);
    pth_attr_set(t_attr, PTH_ATTR_STACK_ADDR,   NULL);
    pth_sched = pth_spawn(t_attr, pth_scheduler, NULL);
    if (pth_sched == NULL) {
        pth_shield {
            pth_attr_destroy(t_attr);
            pth_scheduler_kill();
            pth_syscall_kill();
        }
        return FALSE;
    }

    /* spawn a thread for the main program */
    pth_attr_set(t_attr, PTH_ATTR_PRIO,         PTH_PRIO_STD);
    pth_attr_set(t_attr, PTH_ATTR_NAME,         "main");
    pth_attr_set(t_attr, PTH_ATTR_JOINABLE,     TRUE);
    pth_attr_set(t_attr, PTH_ATTR_CANCEL_STATE, PTH_CANCEL_ENABLE|PTH_CANCEL_DEFERRED);
    pth_attr_set(t_attr, PTH_ATTR_STACK_SIZE,   0 /* special */);
    pth_attr_set(t_attr, PTH_ATTR_STACK_ADDR,   NULL);
    pth_main = pth_spawn(t_attr, (void *(*)(void *))(-1), NULL);
    if (pth_main == NULL) {
        pth_shield {
            pth_attr_destroy(t_attr);
            pth_scheduler_kill();
            pth_syscall_kill();
        }
        return FALSE;
    }
    pth_attr_destroy(t_attr);

    /*
     * The first time we've to manually switch into the scheduler to start
     * threading. Because at this time the only non-scheduler thread is the
     * "main thread" we will come back immediately. We've to also initialize
     * the pth_current variable here to allow the pth_spawn_trampoline
     * function to find the scheduler.
     */
    pth_current = pth_sched;
    pth_mctx_switch(&pth_main->mctx, &pth_sched->mctx);

    /* came back, so let's go home... */
    pth_debug1("pth_init: leave");
    return TRUE;
}

/* kill the package internals */
int pth_kill(void)
{
    if (pth_current != pth_main)
        return pth_error(FALSE, EPERM);
    pth_debug1("pth_kill: enter");
    pth_thread_cleanup(pth_main);
    pth_scheduler_kill();
    pth_initialized = FALSE;
    pth_tcb_free(pth_sched);
    pth_tcb_free(pth_main);
    pth_syscall_kill();
#ifdef PTH_EX
    __ex_ctx       = __ex_ctx_default;
    __ex_terminate = __ex_terminate_default;
#endif
    pth_debug1("pth_kill: leave");
    return TRUE;
}

/* scheduler control/query */
long pth_ctrl(unsigned long query, ...)
{
    long rc;
    va_list ap;

    rc = 0;
    va_start(ap, query);
    if (query & PTH_CTRL_GETTHREADS) {
        if (query & PTH_CTRL_GETTHREADS_NEW)
            rc += pth_pqueue_elements(&pth_NQ);
        if (query & PTH_CTRL_GETTHREADS_READY)
            rc += pth_pqueue_elements(&pth_RQ);
        if (query & PTH_CTRL_GETTHREADS_RUNNING)
            rc += 1; /* pth_current only */
        if (query & PTH_CTRL_GETTHREADS_WAITING)
            rc += pth_pqueue_elements(&pth_WQ);
        if (query & PTH_CTRL_GETTHREADS_SUSPENDED)
            rc += pth_pqueue_elements(&pth_SQ);
        if (query & PTH_CTRL_GETTHREADS_DEAD)
            rc += pth_pqueue_elements(&pth_DQ);
    }
    else if (query & PTH_CTRL_GETAVLOAD) {
        float *pload = va_arg(ap, float *);
        *pload = pth_loadval;
    }
    else if (query & PTH_CTRL_GETPRIO) {
        pth_t t = va_arg(ap, pth_t);
        rc = t->prio;
    }
    else if (query & PTH_CTRL_GETNAME) {
        pth_t t = va_arg(ap, pth_t);
        rc = (long)t->name;
    }
    else if (query & PTH_CTRL_DUMPSTATE) {
        FILE *fp = va_arg(ap, FILE *);
        pth_dumpstate(fp);
    }
    else
        rc = -1;
    va_end(ap);
    if (rc == -1)
        return pth_error(-1, EINVAL);
    return rc;
}

/* create a new thread of execution by spawning a cooperative thread */
static void pth_spawn_trampoline(void)
{
    void *data;

    /* just jump into the start routine */
    data = (*pth_current->start_func)(pth_current->start_arg);

    /* and do an implicit exit of the thread with the result value */
    pth_exit(data);

    /* NOTREACHED */
    abort();
}
pth_t pth_spawn(pth_attr_t attr, void *(*func)(void *), void *arg)
{
    pth_t t;
    unsigned int stacksize;
    void *stackaddr;
    pth_time_t ts;

    pth_debug1("pth_spawn: enter");

    /* consistency */
    if (func == NULL)
        return pth_error((pth_t)NULL, EINVAL);

    /* support the special case of main() */
    if (func == (void *(*)(void *))(-1))
        func = NULL;

    /* allocate a new thread control block */
    stacksize = (attr == PTH_ATTR_DEFAULT ? 64*1024 : attr->a_stacksize);
    stackaddr = (attr == PTH_ATTR_DEFAULT ? NULL    : attr->a_stackaddr);
    if ((t = pth_tcb_alloc(stacksize, stackaddr)) == NULL)
        return pth_error((pth_t)NULL, errno);

    /* configure remaining attributes */
    if (attr != PTH_ATTR_DEFAULT) {
        /* overtake fields from the attribute structure */
        t->prio        = attr->a_prio;
        t->joinable    = attr->a_joinable;
        t->cancelstate = attr->a_cancelstate;
        t->dispatches  = attr->a_dispatches;
        pth_util_cpystrn(t->name, attr->a_name, PTH_TCB_NAMELEN);
    }
    else if (pth_current != NULL) {
        /* overtake some fields from the parent thread */
        t->prio        = pth_current->prio;
        t->joinable    = pth_current->joinable;
        t->cancelstate = pth_current->cancelstate;
        t->dispatches  = 0;
        pth_snprintf(t->name, PTH_TCB_NAMELEN, "%s.child@%d=0x%lx",
                     pth_current->name, (unsigned int)time(NULL),
                     (unsigned long)pth_current);
    }
    else {
        /* defaults */
        t->prio        = PTH_PRIO_STD;
        t->joinable    = TRUE;
        t->cancelstate = PTH_CANCEL_DEFAULT;
        t->dispatches  = 0;
        pth_snprintf(t->name, PTH_TCB_NAMELEN,
                     "user/%x", (unsigned int)time(NULL));
    }

    /* initialize the time points and ranges */
    pth_time_set(&ts, PTH_TIME_NOW);
    pth_time_set(&t->spawned, &ts);
    pth_time_set(&t->lastran, &ts);
    pth_time_set(&t->running, PTH_TIME_ZERO);

    /* initialize events */
    t->events = NULL;

    /* clear raised signals */
    sigemptyset(&t->sigpending);
    t->sigpendcnt = 0;

    /* remember the start routine and arguments for our trampoline */
    t->start_func = func;
    t->start_arg  = arg;

    /* initialize join argument */
    t->join_arg = NULL;

    /* initialize thread specific storage */
    t->data_value = NULL;
    t->data_count = 0;

    /* initialize cancellation stuff */
    t->cancelreq   = FALSE;
    t->cleanups    = NULL;

    /* initialize mutex stuff */
    pth_ring_init(&t->mutexring);

#ifdef PTH_EX
    /* initialize exception handling context */
    EX_CTX_INITIALIZE(&t->ex_ctx);
#endif

    /* initialize the machine context of this new thread */
    if (t->stacksize > 0) { /* the "main thread" (indicated by == 0) is special! */
        if (!pth_mctx_set(&t->mctx, pth_spawn_trampoline,
                          t->stack, ((char *)t->stack+t->stacksize))) {
            pth_shield { pth_tcb_free(t); }
            return pth_error((pth_t)NULL, errno);
        }
    }

    /* finally insert it into the "new queue" where
       the scheduler will pick it up for dispatching */
    if (func != pth_scheduler) {
        t->state = PTH_STATE_NEW;
        pth_pqueue_insert(&pth_NQ, t->prio, t);
    }

    pth_debug1("pth_spawn: leave");

    /* the returned thread id is just the pointer
       to the thread control block... */
    return t;
}

/* returns the current thread */
pth_t pth_self(void)
{
    return pth_current;
}

/* raise a signal for a thread */
int pth_raise(pth_t t, int sig)
{
    struct sigaction sa;

    if (t == NULL || t == pth_current || (sig < 0 || sig > PTH_NSIG))
        return pth_error(FALSE, EINVAL);
    if (sig == 0)
        /* just test whether thread exists */
        return pth_thread_exists(t);
    else {
        /* raise signal for thread */
        if (sigaction(sig, NULL, &sa) != 0)
            return FALSE;
        if (sa.sa_handler == SIG_IGN)
            return TRUE; /* fine, nothing to do, sig is globally ignored */
        if (!sigismember(&t->sigpending, sig)) {
            sigaddset(&t->sigpending, sig);
            t->sigpendcnt++;
        }
        pth_yield(t);
        return TRUE;
    }
}

/* check whether a thread exists */
intern int pth_thread_exists(pth_t t)
{
    if (!pth_pqueue_contains(&pth_NQ, t))
        if (!pth_pqueue_contains(&pth_RQ, t))
            if (!pth_pqueue_contains(&pth_WQ, t))
                if (!pth_pqueue_contains(&pth_SQ, t))
                    if (!pth_pqueue_contains(&pth_DQ, t))
                        return pth_error(FALSE, ESRCH); /* not found */
    return TRUE;
}

/* cleanup a particular thread */
intern void pth_thread_cleanup(pth_t thread)
{
    /* run the cleanup handlers */
    if (thread->cleanups != NULL)
        pth_cleanup_popall(thread, TRUE);

    /* run the specific data destructors */
    if (thread->data_value != NULL)
        pth_key_destroydata(thread);

    /* release still acquired mutex variables */
    pth_mutex_releaseall(thread);

    return;
}

/* terminate the current thread */
static int pth_exit_cb(void *arg)
{
    int rc;

    /* BE CAREFUL HERE: THIS FUNCTION EXECUTES
       FROM WITHIN THE _SCHEDULER_ THREAD! */

    /* calculate number of still existing threads in system. Only
       skipped queue is pth_DQ (dead queue). This queue does not
       count here, because those threads are non-detached but already
       terminated ones -- and if we are the only remaining thread (which
       also wants to terminate and not join those threads) we can signal
       us through the scheduled event (for which we are running as the
       test function inside the scheduler) that the whole process can
       terminate now. */
    rc = 0;
    rc += pth_pqueue_elements(&pth_NQ);
    rc += pth_pqueue_elements(&pth_RQ);
    rc += pth_pqueue_elements(&pth_WQ);
    rc += pth_pqueue_elements(&pth_SQ);

    if (rc == 1 /* just our main thread */)
        return TRUE;
    else
        return FALSE;
}
void pth_exit(void *value)
{
    pth_event_t ev;

    pth_debug2("pth_exit: marking thread \"%s\" as dead", pth_current->name);

    /* the main thread is special, because its termination
       would terminate the whole process, so we have to delay 
       its termination until it is really the last thread */
    if (pth_current == pth_main) {
        if (!pth_exit_cb(NULL)) {
            ev = pth_event(PTH_EVENT_FUNC, pth_exit_cb);
            pth_wait(ev);
            pth_event_free(ev, PTH_FREE_THIS);
        }
    }

    /* execute cleanups */
    pth_thread_cleanup(pth_current);

    if (pth_current != pth_main) {
        /*
         * Now mark the current thread as dead, explicitly switch into the
         * scheduler and let it reap the current thread structure; we can't
         * free it here, or we'd be running on a stack which malloc() regards
         * as free memory, which would be a somewhat perilous situation.
         */
        pth_current->join_arg = value;
        pth_current->state = PTH_STATE_DEAD;
        pth_debug2("pth_exit: switching from thread \"%s\" to scheduler", pth_current->name);
        pth_mctx_switch(&pth_current->mctx, &pth_sched->mctx);
    }
    else {
        /*
         * main thread is special: exit the _process_
         * [double-casted to avoid warnings because of size]
         */
        pth_kill();
        exit((int)((long)value));
    }

    /* NOTREACHED */
    abort();
}

/* waits for the termination of the specified thread */
int pth_join(pth_t tid, void **value)
{
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;

    pth_debug2("pth_join: joining thread \"%s\"", tid == NULL ? "-ANY-" : tid->name);
    if (tid == pth_current)
        return pth_error(FALSE, EDEADLK);
    if (tid != NULL && !tid->joinable)
        return pth_error(FALSE, EINVAL);
    if (pth_ctrl(PTH_CTRL_GETTHREADS) == 1)
        return pth_error(FALSE, EDEADLK);
    if (tid == NULL)
        tid = pth_pqueue_head(&pth_DQ);
    if (tid == NULL || (tid != NULL && tid->state != PTH_STATE_DEAD)) {
        ev = pth_event(PTH_EVENT_TID|PTH_UNTIL_TID_DEAD|PTH_MODE_STATIC, &ev_key, tid);
        pth_wait(ev);
    }
    if (tid == NULL)
        tid = pth_pqueue_head(&pth_DQ);
    if (tid == NULL || (tid != NULL && tid->state != PTH_STATE_DEAD))
        return pth_error(FALSE, EIO);
    if (value != NULL)
        *value = tid->join_arg;
    pth_pqueue_delete(&pth_DQ, tid);
    pth_tcb_free(tid);
    return TRUE;
}

/* delegates control back to scheduler for context switches */
int pth_yield(pth_t to)
{
    pth_pqueue_t *q = NULL;

    pth_debug2("pth_yield: enter from thread \"%s\"", pth_current->name);

    /* a given thread has to be new or ready or we ignore the request */
    if (to != NULL) {
        switch (to->state) {
            case PTH_STATE_NEW:    q = &pth_NQ; break;
            case PTH_STATE_READY:  q = &pth_RQ; break;
            default:               q = NULL;
        }
        if (q == NULL || !pth_pqueue_contains(q, to))
            return pth_error(FALSE, EINVAL);
    }

    /* give a favored thread maximum priority in his queue */
    if (to != NULL && q != NULL)
        pth_pqueue_favorite(q, to);

    /* switch to scheduler */
    if (to != NULL)
        pth_debug2("pth_yield: give up control to scheduler "
                   "in favour of thread \"%s\"", to->name);
    else
        pth_debug1("pth_yield: give up control to scheduler");
    pth_mctx_switch(&pth_current->mctx, &pth_sched->mctx);
    pth_debug1("pth_yield: got back control from scheduler");

    pth_debug2("pth_yield: leave to thread \"%s\"", pth_current->name);
    return TRUE;
}

/* suspend a thread until its again manually resumed */
int pth_suspend(pth_t t)
{
    pth_pqueue_t *q;

    if (t == NULL)
        return pth_error(FALSE, EINVAL);
    if (t == pth_sched || t == pth_current)
        return pth_error(FALSE, EPERM);
    switch (t->state) {
        case PTH_STATE_NEW:     q = &pth_NQ; break;
        case PTH_STATE_READY:   q = &pth_RQ; break;
        case PTH_STATE_WAITING: q = &pth_WQ; break;
        default:                q = NULL;
    }
    if (q == NULL)
        return pth_error(FALSE, EPERM);
    if (!pth_pqueue_contains(q, t))
        return pth_error(FALSE, ESRCH);
    pth_pqueue_delete(q, t);
    pth_pqueue_insert(&pth_SQ, PTH_PRIO_STD, t);
    pth_debug2("pth_suspend: suspend thread \"%s\"\n", t->name);
    return TRUE;
}

/* resume a previously suspended thread */
int pth_resume(pth_t t)
{
    pth_pqueue_t *q;

    if (t == NULL)
        return pth_error(FALSE, EINVAL);
    if (t == pth_sched || t == pth_current)
        return pth_error(FALSE, EPERM);
    if (!pth_pqueue_contains(&pth_SQ, t))
        return pth_error(FALSE, EPERM);
    pth_pqueue_delete(&pth_SQ, t);
    switch (t->state) {
        case PTH_STATE_NEW:     q = &pth_NQ; break;
        case PTH_STATE_READY:   q = &pth_RQ; break;
        case PTH_STATE_WAITING: q = &pth_WQ; break;
        default:                q = NULL;
    }
    pth_pqueue_insert(q, PTH_PRIO_STD, t);
    pth_debug2("pth_resume: resume thread \"%s\"\n", t->name);
    return TRUE;
}

/* switch a filedescriptor's I/O mode */
int pth_fdmode(int fd, int newmode)
{
    int fdmode;
    int oldmode;

    /* retrieve old mode (usually a very cheap operation) */
    if ((fdmode = fcntl(fd, F_GETFL, NULL)) == -1)
        oldmode = PTH_FDMODE_ERROR;
    else if (fdmode & O_NONBLOCKING)
        oldmode = PTH_FDMODE_NONBLOCK;
    else
        oldmode = PTH_FDMODE_BLOCK;

    /* set new mode (usually a more expensive operation) */
    if (oldmode == PTH_FDMODE_BLOCK && newmode == PTH_FDMODE_NONBLOCK)
        fcntl(fd, F_SETFL, (fdmode | O_NONBLOCKING));
    if (oldmode == PTH_FDMODE_NONBLOCK && newmode == PTH_FDMODE_BLOCK)
        fcntl(fd, F_SETFL, (fdmode & ~(O_NONBLOCKING)));

    /* return old mode */
    return oldmode;
}

/* wait for specific amount of time */
int pth_nap(pth_time_t naptime)
{
    pth_time_t until;
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;

    if (pth_time_cmp(&naptime, PTH_TIME_ZERO) == 0)
        return pth_error(FALSE, EINVAL);
    pth_time_set(&until, PTH_TIME_NOW);
    pth_time_add(&until, &naptime);
    ev = pth_event(PTH_EVENT_TIME|PTH_MODE_STATIC, &ev_key, until);
    pth_wait(ev);
    return TRUE;
}

/* runs a constructor once */
int pth_once(pth_once_t *oncectrl, void (*constructor)(void *), void *arg)
{
    if (oncectrl == NULL || constructor == NULL)
        return pth_error(FALSE, EINVAL);
    if (*oncectrl != TRUE)
        constructor(arg);
    *oncectrl = TRUE;
    return TRUE;
}

/* ==== pth_fork.c ==== */

struct pth_atfork_st {
    void (*prepare)(void *);
    void (*parent)(void *);
    void (*child)(void *);
    void *arg;
};

static struct pth_atfork_st pth_atfork_list[PTH_ATFORK_MAX];
static int pth_atfork_idx = 0;

int pth_atfork_push(void (*prepare)(void *), void (*parent)(void *),
                    void (*child)(void *), void *arg)
{
    if (pth_atfork_idx > PTH_ATFORK_MAX-1)
        return pth_error(FALSE, ENOMEM);
    pth_atfork_list[pth_atfork_idx].prepare = prepare;
    pth_atfork_list[pth_atfork_idx].parent  = parent;
    pth_atfork_list[pth_atfork_idx].child   = child;
    pth_atfork_list[pth_atfork_idx].arg     = arg;
    pth_atfork_idx++;
    return TRUE;
}

int pth_atfork_pop(void)
{
    if (pth_atfork_idx <= 0)
        return FALSE;
    pth_atfork_idx--;
    return TRUE;
}

pid_t pth_fork(void)
{
    pid_t pid;
    int i;

    /* run preparation handlers in LIFO order */
    for (i = pth_atfork_idx-1; i >= 0; i--)
        if (pth_atfork_list[i].prepare != NULL)
            pth_atfork_list[i].prepare(pth_atfork_list[i].arg);

    /* fork the process */
    if ((pid = pth_sc(fork)()) == -1)
        return FALSE;

    /* handle parent and child contexts */
    if (pid != 0) {
        /* Parent: */

        /* run parent handlers in FIFO order */
        for (i = 0; i <= pth_atfork_idx-1; i++)
            if (pth_atfork_list[i].parent != NULL)
                pth_atfork_list[i].parent(pth_atfork_list[i].arg);
    }
    else {
        /* Child: */

        /* kick out all threads except for the current one and the scheduler */
        pth_scheduler_drop();

        /* run child handlers in FIFO order */
        for (i = 0; i <= pth_atfork_idx-1; i++)
            if (pth_atfork_list[i].child != NULL)
                pth_atfork_list[i].child(pth_atfork_list[i].arg);
    }
    return pid;
}

/* ==== pth_high.c ==== */

/* Pth variant of nanosleep(2) */
int pth_nanosleep(const struct timespec *rqtp, struct timespec *rmtp)
{
    pth_time_t until;
    pth_time_t offset;
    pth_time_t now;
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;

    /* consistency checks for POSIX conformance */
    if (rqtp == NULL)
        return pth_error(-1, EFAULT);
    if (rqtp->tv_nsec < 0 || rqtp->tv_nsec > (1000*1000000))
        return pth_error(-1, EINVAL);

    /* short-circuit */
    if (rqtp->tv_sec == 0 && rqtp->tv_nsec == 0)
        return 0;

    /* calculate asleep time */
    offset = pth_time((long)(rqtp->tv_sec), (long)(rqtp->tv_nsec) / 1000);
    pth_time_set(&until, PTH_TIME_NOW);
    pth_time_add(&until, &offset);

    /* and let thread sleep until this time is elapsed */
    ev = pth_event(PTH_EVENT_TIME|PTH_MODE_STATIC, &ev_key, until);
    pth_wait(ev);

    /* optionally provide amount of slept time */
    if (rmtp != NULL) {
        pth_time_set(&now, PTH_TIME_NOW);
        pth_time_sub(&until, &now);
        rmtp->tv_sec  = until.tv_sec;
        rmtp->tv_nsec = until.tv_usec * 1000;
    }

    return 0;
}

/* Pth variant of usleep(3) */
int pth_usleep(unsigned int usec)
{
    pth_time_t until;
    pth_time_t offset;
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;

    /* short-circuit */
    if (usec == 0)
        return 0;

    /* calculate asleep time */
    offset = pth_time((long)(usec / 1000000), (long)(usec % 1000000));
    pth_time_set(&until, PTH_TIME_NOW);
    pth_time_add(&until, &offset);

    /* and let thread sleep until this time is elapsed */
    ev = pth_event(PTH_EVENT_TIME|PTH_MODE_STATIC, &ev_key, until);
    pth_wait(ev);

    return 0;
}

/* Pth variant of sleep(3) */
unsigned int pth_sleep(unsigned int sec)
{
    pth_time_t until;
    pth_time_t offset;
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;

    /* consistency check */
    if (sec == 0)
        return 0;

    /* calculate asleep time */
    offset = pth_time(sec, 0);
    pth_time_set(&until, PTH_TIME_NOW);
    pth_time_add(&until, &offset);

    /* and let thread sleep until this time is elapsed */
    ev = pth_event(PTH_EVENT_TIME|PTH_MODE_STATIC, &ev_key, until);
    pth_wait(ev);

    return 0;
}

/* Pth variant of POSIX pthread_sigmask(3) */
int pth_sigmask(int how, const sigset_t *set, sigset_t *oset)
{
    int rv;

    /* change the explicitly remembered signal mask copy for the scheduler */
    if (set != NULL)
        pth_sc(sigprocmask)(how, &(pth_current->mctx.sigs), NULL);

    /* change the real (per-thread saved/restored) signal mask */
    rv = pth_sc(sigprocmask)(how, set, oset);

    return rv;
}

/* Pth variant of POSIX sigwait(3) */
int pth_sigwait(const sigset_t *set, int *sigp)
{
    return pth_sigwait_ev(set, sigp, NULL);
}

/* Pth variant of POSIX sigwait(3) with extra events */
int pth_sigwait_ev(const sigset_t *set, int *sigp, pth_event_t ev_extra)
{
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;
    sigset_t pending;
    int sig;

    if (set == NULL || sigp == NULL)
        return pth_error(EINVAL, EINVAL);

    /* check whether signal is already pending */
    if (sigpending(&pending) < 0)
        sigemptyset(&pending);
    for (sig = 1; sig < PTH_NSIG; sig++) {
        if (sigismember(set, sig) && sigismember(&pending, sig)) {
            pth_util_sigdelete(sig);
            *sigp = sig;
            return 0;
        }
    }

    /* create event and wait on it */
    ev = pth_event(PTH_EVENT_SIGS|PTH_MODE_STATIC, &ev_key, set, sigp);
    if (ev_extra != NULL)
        pth_event_concat(ev, ev_extra, NULL);
    pth_wait(ev);
    if (ev_extra != NULL) {
        pth_event_isolate(ev);
        if (pth_event_status(ev) != PTH_STATUS_OCCURRED)
            return pth_error(EINTR, EINTR);
    }

    /* nothing to do, scheduler has already set *sigp for us */
    return 0;
}

/* Pth variant of waitpid(2) */
pid_t pth_waitpid(pid_t wpid, int *status, int options)
{
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;
    pid_t pid;

    pth_debug2("pth_waitpid: called from thread \"%s\"", pth_current->name);

    for (;;) {
        /* do a non-blocking poll for the pid */
        while (   (pid = pth_sc(waitpid)(wpid, status, options|WNOHANG)) < 0
               && errno == EINTR) ;

        /* if pid was found or caller requested a polling return immediately */
        if (pid == -1 || pid > 0 || (pid == 0 && (options & WNOHANG)))
            break;

        /* else wait a little bit */
        ev = pth_event(PTH_EVENT_TIME|PTH_MODE_STATIC, &ev_key, pth_timeout(0,250000));
        pth_wait(ev);
    }

    pth_debug2("pth_waitpid: leave to thread \"%s\"", pth_current->name);
    return pid;
}

/* Pth variant of system(3) */
int pth_system(const char *cmd)
{
    struct sigaction sa_ign, sa_int, sa_quit;
    sigset_t ss_block, ss_old;
    struct stat sb;
    pid_t pid;
    int pstat;

    /* POSIX calling convention: determine whether the
       Bourne Shell ("sh") is available on this platform */
    if (cmd == NULL) {
        if (stat(PTH_PATH_BINSH, &sb) == -1)
            return 0;
        return 1;
    }

    /* temporarily ignore SIGINT and SIGQUIT actions */
    sa_ign.sa_handler = SIG_IGN;
    sigemptyset(&sa_ign.sa_mask);
    sa_ign.sa_flags = 0;
    sigaction(SIGINT,  &sa_ign, &sa_int);
    sigaction(SIGQUIT, &sa_ign, &sa_quit);

    /* block SIGCHLD signal */
    sigemptyset(&ss_block);
    sigaddset(&ss_block, SIGCHLD);
    pth_sc(sigprocmask)(SIG_BLOCK, &ss_block, &ss_old);

    /* fork the current process */
    pstat = -1;
    switch (pid = pth_fork()) {
        case -1: /* error */
            break;

        case 0:  /* child */
            /* restore original signal dispositions and execute the command */
            sigaction(SIGINT,  &sa_int,  NULL);
            sigaction(SIGQUIT, &sa_quit, NULL);
            pth_sc(sigprocmask)(SIG_SETMASK, &ss_old, NULL);

            /* stop the Pth scheduling */
            pth_scheduler_kill();

            /* execute the command through Bourne Shell */
            execl(PTH_PATH_BINSH, "sh", "-c", cmd, NULL);

            /* POSIX compliant return in case execution failed */
            exit(127);

        default: /* parent */
            /* wait until child process terminates */
            pid = pth_waitpid(pid, &pstat, 0);
            break;
    }

    /* restore original signal dispositions and execute the command */
    sigaction(SIGINT,  &sa_int,  NULL);
    sigaction(SIGQUIT, &sa_quit, NULL);
    pth_sc(sigprocmask)(SIG_SETMASK, &ss_old, NULL);

    /* return error or child process result code */
    return (pid == -1 ? -1 : pstat);
}

/* Pth variant of select(2) */
int pth_select(int nfds, fd_set *rfds, fd_set *wfds,
               fd_set *efds, struct timeval *timeout)
{
    return pth_select_ev(nfds, rfds, wfds, efds, timeout, NULL);
}

/* Pth variant of select(2) with extra events */
int pth_select_ev(int nfd, fd_set *rfds, fd_set *wfds,
                  fd_set *efds, struct timeval *timeout, pth_event_t ev_extra)
{
    struct timeval delay;
    pth_event_t ev;
    pth_event_t ev_select;
    pth_event_t ev_timeout;
    static pth_key_t ev_key_select  = PTH_KEY_INIT;
    static pth_key_t ev_key_timeout = PTH_KEY_INIT;
    fd_set rspare, wspare, espare;
    fd_set *rtmp, *wtmp, *etmp;
    int selected;
    int rc;

    pth_implicit_init();
    pth_debug2("pth_select_ev: called from thread \"%s\"", pth_current->name);

    /* POSIX.1-2001/SUSv3 compliance */
    if (nfd < 0 || nfd > FD_SETSIZE)
        return pth_error(-1, EINVAL);
    if (timeout != NULL) {
        if (   timeout->tv_sec  < 0
            || timeout->tv_usec < 0
            || timeout->tv_usec >= 1000000 /* a full second */)
            return pth_error(-1, EINVAL);
        if (timeout->tv_sec > 31*24*60*60)
            timeout->tv_sec = 31*24*60*60;
    }

    /* first deal with the special situation of a plain microsecond delay */
    if (nfd == 0 && rfds == NULL && wfds == NULL && efds == NULL && timeout != NULL) {
        if (timeout->tv_sec == 0 && timeout->tv_usec <= 10000 /* 1/100 second */) {
            /* very small delays are acceptable to be performed directly */
            while (   pth_sc(select)(0, NULL, NULL, NULL, timeout) < 0
                   && errno == EINTR) ;
        }
        else {
            /* larger delays have to go through the scheduler */
            ev = pth_event(PTH_EVENT_TIME|PTH_MODE_STATIC, &ev_key_timeout,
                           pth_timeout(timeout->tv_sec, timeout->tv_usec));
            if (ev_extra != NULL)
                pth_event_concat(ev, ev_extra, NULL);
            pth_wait(ev);
            if (ev_extra != NULL) {
                pth_event_isolate(ev);
                if (pth_event_status(ev) != PTH_STATUS_OCCURRED)
                    return pth_error(-1, EINTR);
            }
        }
        /* POSIX.1-2001/SUSv3 compliance */
        if (rfds != NULL) FD_ZERO(rfds);
        if (wfds != NULL) FD_ZERO(wfds);
        if (efds != NULL) FD_ZERO(efds);
        return 0;
    }

    /* now directly poll filedescriptor sets to avoid unnecessary
       (and resource consuming because of context switches, etc) event
       handling through the scheduler. We've to be carefully here, because not
       all platforms guaranty us that the sets are unmodified if an error
       or timeout occurred. */
    delay.tv_sec  = 0;
    delay.tv_usec = 0;
    rtmp = NULL;
    if (rfds != NULL) {
        memcpy(&rspare, rfds, sizeof(fd_set));
        rtmp = &rspare;
    }
    wtmp = NULL;
    if (wfds != NULL) {
        memcpy(&wspare, wfds, sizeof(fd_set));
        wtmp = &wspare;
    }
    etmp = NULL;
    if (efds != NULL) {
        memcpy(&espare, efds, sizeof(fd_set));
        etmp = &espare;
    }
    while ((rc = pth_sc(select)(nfd, rtmp, wtmp, etmp, &delay)) < 0
           && errno == EINTR)
        ;
    if (rc < 0)
        /* pass-through immediate error */
        return pth_error(-1, errno);
    else if (   rc > 0
             || (   rc == 0
                 && timeout != NULL
                 && pth_time_cmp(timeout, PTH_TIME_ZERO) == 0)) {
        /* pass-through immediate success */
        if (rfds != NULL)
            memcpy(rfds, &rspare, sizeof(fd_set));
        if (wfds != NULL)
            memcpy(wfds, &wspare, sizeof(fd_set));
        if (efds != NULL)
            memcpy(efds, &espare, sizeof(fd_set));
        return rc;
    }

    /* suspend current thread until one filedescriptor
       is ready or the timeout occurred */
    rc = -1;
    ev = ev_select = pth_event(PTH_EVENT_SELECT|PTH_MODE_STATIC,
                               &ev_key_select, &rc, nfd, rfds, wfds, efds);
    ev_timeout = NULL;
    if (timeout != NULL) {
        ev_timeout = pth_event(PTH_EVENT_TIME|PTH_MODE_STATIC, &ev_key_timeout,
                               pth_timeout(timeout->tv_sec, timeout->tv_usec));
        pth_event_concat(ev, ev_timeout, NULL);
    }
    if (ev_extra != NULL)
        pth_event_concat(ev, ev_extra, NULL);
    pth_wait(ev);
    if (ev_extra != NULL)
        pth_event_isolate(ev_extra);
    if (timeout != NULL)
        pth_event_isolate(ev_timeout);

    /* select return code semantics */
    if (pth_event_status(ev_select) == PTH_STATUS_FAILED)
        return pth_error(-1, EBADF);
    selected = FALSE;
    if (pth_event_status(ev_select) == PTH_STATUS_OCCURRED)
        selected = TRUE;
    if (   timeout != NULL
        && pth_event_status(ev_timeout) == PTH_STATUS_OCCURRED) {
        selected = TRUE;
        /* POSIX.1-2001/SUSv3 compliance */
        if (rfds != NULL) FD_ZERO(rfds);
        if (wfds != NULL) FD_ZERO(wfds);
        if (efds != NULL) FD_ZERO(efds);
        rc = 0;
    }
    if (ev_extra != NULL && !selected)
        return pth_error(-1, EINTR);

    return rc;
}

/* Pth variant of pth_pselect(2) */
int pth_pselect(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds,
                const struct timespec *ts, const sigset_t *mask)
{
    sigset_t omask;
    struct timeval tv;
    struct timeval *tvp;
    int rv;

    /* convert timeout */
    if (ts != NULL) {
        tv.tv_sec  = ts->tv_sec;
        tv.tv_usec = ts->tv_nsec / 1000;
        tvp = &tv;
    }
    else
        tvp = NULL;

    /* optionally set signal mask */
    if (mask != NULL)
        if (pth_sc(sigprocmask)(SIG_SETMASK, mask, &omask) < 0)
            return pth_error(-1, errno);

    rv = pth_select(nfds, rfds, wfds, efds, tvp);

    /* optionally set signal mask */
    if (mask != NULL)
        pth_shield { pth_sc(sigprocmask)(SIG_SETMASK, &omask, NULL); }

    return rv;
}

/* Pth variant of poll(2) */
int pth_poll(struct pollfd *pfd, nfds_t nfd, int timeout)
{
    return pth_poll_ev(pfd, nfd, timeout, NULL);
}

/* Pth variant of poll(2) with extra events:
   NOTICE: THIS HAS TO BE BASED ON pth_select(2) BECAUSE
           INTERNALLY THE SCHEDULER IS ONLY select(2) BASED!! */
int pth_poll_ev(struct pollfd *pfd, nfds_t nfd, int timeout, pth_event_t ev_extra)
{
    fd_set rfds, wfds, efds, xfds;
    struct timeval tv, *ptv;
    int maxfd, rc, n;
    unsigned int i;
    char data[64];

    pth_implicit_init();
    pth_debug2("pth_poll_ev: called from thread \"%s\"", pth_current->name);

    /* argument sanity checks */
    if (pfd == NULL)
        return pth_error(-1, EFAULT);
    if (nfd < 0 || nfd > FD_SETSIZE)
        return pth_error(-1, EINVAL);

    /* convert timeout number into a timeval structure */
    ptv = &tv;
    if (timeout == 0) {
        /* return immediately */
        ptv->tv_sec  = 0;
        ptv->tv_usec = 0;
    }
    else if (timeout == INFTIM /* (-1) */) {
        /* wait forever */
        ptv = NULL;
    }
    else if (timeout > 0) {
        /* return after timeout */
        ptv->tv_sec  = (timeout / 1000);
        ptv->tv_usec = (timeout % 1000) * 1000;
    }
    else
        return pth_error(-1, EINVAL);

    /* create fd sets and determine max fd */
    maxfd = -1;
    FD_ZERO(&rfds);
    FD_ZERO(&wfds);
    FD_ZERO(&efds);
    FD_ZERO(&xfds);
    for (i = 0; i < nfd; i++) {
        /* convert into fd_sets but remember that BSD select(2) says
           "the only exceptional condition detectable is out-of-band
           data received on a socket", hence we push POLLWRBAND events
           onto wfds instead of efds. Additionally, remember invalid
           filedescriptors in an extra fd_set xfds. */
        if (!pth_util_fd_valid(pfd[i].fd)) {
            FD_SET(pfd[i].fd, &xfds);
            continue;
        }
        if (pfd[i].events & (POLLIN|POLLRDNORM))
            FD_SET(pfd[i].fd, &rfds);
        if (pfd[i].events & (POLLOUT|POLLWRNORM|POLLWRBAND))
            FD_SET(pfd[i].fd, &wfds);
        if (pfd[i].events & (POLLPRI|POLLRDBAND))
            FD_SET(pfd[i].fd, &efds);
        if (   pfd[i].fd >= maxfd
            && (pfd[i].events & (POLLIN|POLLOUT|POLLPRI|
                                 POLLRDNORM|POLLRDBAND|
                                 POLLWRNORM|POLLWRBAND)))
            maxfd = pfd[i].fd;
    }

    /* examine fd sets with pth_select(3) */
    rc = -1;
    if (maxfd != -1) {
        rc = pth_select_ev(maxfd+1, &rfds, &wfds, &efds, ptv, ev_extra);
        if (rc < 0)
            return pth_error(-1, errno);
        else if (rc == 0)
            return 0;
    }

    /* POSIX.1-2001/SUSv3 compliant result establishment */
    n = 0;
    for (i = 0; i < nfd; i++) {
        pfd[i].revents = 0;
        if (FD_ISSET(pfd[i].fd, &xfds)) {
            if (pfd[i].fd >= 0) {
                pfd[i].revents |= POLLNVAL;
                n++;
            }
            continue;
        }
        if (maxfd == -1)
            continue;
        if (FD_ISSET(pfd[i].fd, &rfds)) {
            if (pfd[i].events & POLLIN)
                pfd[i].revents |= POLLIN;
            if (pfd[i].events & POLLRDNORM)
                pfd[i].revents |= POLLRDNORM;
            n++;
            /* support for POLLHUP */
            if (   recv(pfd[i].fd, data, sizeof(data), MSG_PEEK) == -1
                && (   errno == ESHUTDOWN    || errno == ECONNRESET
                    || errno == ECONNABORTED || errno == ENETRESET    )) {
                pfd[i].revents &= ~(POLLIN);
                pfd[i].revents &= ~(POLLRDNORM);
                pfd[i].revents |= POLLHUP;
            }
        }
        else if (FD_ISSET(pfd[i].fd, &wfds)) {
            if (pfd[i].events & POLLOUT)
                pfd[i].revents |= POLLOUT;
            if (pfd[i].events & POLLWRNORM)
                pfd[i].revents |= POLLWRNORM;
            if (pfd[i].events & POLLWRBAND)
                pfd[i].revents |= POLLWRBAND;
            n++;
        }
        else if (FD_ISSET(pfd[i].fd, &efds)) {
            if (pfd[i].events & POLLPRI)
                pfd[i].revents |= POLLPRI;
            if (pfd[i].events & POLLRDBAND)
                pfd[i].revents |= POLLRDBAND;
            n++;
        }
    }

    return n;
}

/* Pth variant of connect(2) */
int pth_connect(int s, const struct sockaddr *addr, socklen_t addrlen)
{
    return pth_connect_ev(s, addr, addrlen, NULL);
}

/* Pth variant of connect(2) with extra events */
int pth_connect_ev(int s, const struct sockaddr *addr, socklen_t addrlen, pth_event_t ev_extra)
{
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;
    int rv, err;
    socklen_t errlen;
    int fdmode;

    pth_implicit_init();
    pth_debug2("pth_connect_ev: enter from thread \"%s\"", pth_current->name);

    /* POSIX compliance */
    if (!pth_util_fd_valid(s))
        return pth_error(-1, EBADF);

    /* force filedescriptor into non-blocking mode */
    if ((fdmode = pth_fdmode(s, PTH_FDMODE_NONBLOCK)) == PTH_FDMODE_ERROR)
        return pth_error(-1, EBADF);

    /* try to connect */
    while (   (rv = pth_sc(connect)(s, (struct sockaddr *)addr, addrlen)) == -1
           && errno == EINTR)
        ;

    /* restore filedescriptor mode */
    pth_shield { pth_fdmode(s, fdmode); }

    /* if it is still on progress wait until socket is really writeable */
    if (rv == -1 && errno == EINPROGRESS && fdmode != PTH_FDMODE_NONBLOCK) {
        ev = pth_event(PTH_EVENT_FD|PTH_UNTIL_FD_WRITEABLE|PTH_MODE_STATIC, &ev_key, s);
        if (ev_extra != NULL)
            pth_event_concat(ev, ev_extra, NULL);
        pth_wait(ev);
        if (ev_extra != NULL) {
            pth_event_isolate(ev);
            if (pth_event_status(ev) != PTH_STATUS_OCCURRED)
                return pth_error(-1, EINTR);
        }
        errlen = sizeof(err);
        if (getsockopt(s, SOL_SOCKET, SO_ERROR, (void *)&err, &errlen) == -1)
            return -1;
        if (err == 0)
            return 0;
        return pth_error(rv, err);
    }

    pth_debug2("pth_connect_ev: leave to thread \"%s\"", pth_current->name);
    return rv;
}

/* Pth variant of accept(2) */
int pth_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
{
    return pth_accept_ev(s, addr, addrlen, NULL);
}

/* Pth variant of accept(2) with extra events */
int pth_accept_ev(int s, struct sockaddr *addr, socklen_t *addrlen, pth_event_t ev_extra)
{
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;
    int fdmode;
    int rv;

    pth_implicit_init();
    pth_debug2("pth_accept_ev: enter from thread \"%s\"", pth_current->name);

    /* POSIX compliance */
    if (!pth_util_fd_valid(s))
        return pth_error(-1, EBADF);

    /* force filedescriptor into non-blocking mode */
    if ((fdmode = pth_fdmode(s, PTH_FDMODE_NONBLOCK)) == PTH_FDMODE_ERROR)
        return pth_error(-1, EBADF);

    /* poll socket via accept */
    ev = NULL;
    while ((rv = pth_sc(accept)(s, addr, addrlen)) == -1
           && (errno == EAGAIN || errno == EWOULDBLOCK)
           && fdmode != PTH_FDMODE_NONBLOCK) {
        /* do lazy event allocation */
        if (ev == NULL) {
            ev = pth_event(PTH_EVENT_FD|PTH_UNTIL_FD_READABLE|PTH_MODE_STATIC, &ev_key, s);
            if (ev_extra != NULL)
                pth_event_concat(ev, ev_extra, NULL);
        }
        /* wait until accept has a chance */
        pth_wait(ev);
        /* check for the extra events */
        if (ev_extra != NULL) {
            pth_event_isolate(ev);
            if (pth_event_status(ev) != PTH_STATUS_OCCURRED) {
                pth_fdmode(s, fdmode);
                return pth_error(-1, EINTR);
            }
        }
    }

    /* restore filedescriptor mode */
    pth_shield {
        pth_fdmode(s, fdmode);
        if (rv != -1)
            pth_fdmode(rv, fdmode);
    }

    pth_debug2("pth_accept_ev: leave to thread \"%s\"", pth_current->name);
    return rv;
}

/* Pth variant of read(2) */
ssize_t pth_read(int fd, void *buf, size_t nbytes)
{
    return pth_read_ev(fd, buf, nbytes, NULL);
}

/* Pth variant of read(2) with extra event(s) */
ssize_t pth_read_ev(int fd, void *buf, size_t nbytes, pth_event_t ev_extra)
{
    struct timeval delay;
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;
    fd_set fds;
    int fdmode;
    int n;

    pth_implicit_init();
    pth_debug2("pth_read_ev: enter from thread \"%s\"", pth_current->name);

    /* POSIX compliance */
    if (nbytes == 0)
        return 0;
    if (!pth_util_fd_valid(fd))
        return pth_error(-1, EBADF);

    /* check mode of filedescriptor */
    if ((fdmode = pth_fdmode(fd, PTH_FDMODE_POLL)) == PTH_FDMODE_ERROR)
        return pth_error(-1, EBADF);

    /* poll filedescriptor if not already in non-blocking operation */
    if (fdmode == PTH_FDMODE_BLOCK) {

        /* now directly poll filedescriptor for readability
           to avoid unneccessary (and resource consuming because of context
           switches, etc) event handling through the scheduler */
        FD_ZERO(&fds);
        FD_SET(fd, &fds);
        delay.tv_sec  = 0;
        delay.tv_usec = 0;
        while ((n = pth_sc(select)(fd+1, &fds, NULL, NULL, &delay)) < 0
               && errno == EINTR) ;
        if (n < 0 && (errno == EINVAL || errno == EBADF))
            return pth_error(-1, errno);

        /* if filedescriptor is still not readable,
           let thread sleep until it is or the extra event occurs */
        if (n == 0) {
            ev = pth_event(PTH_EVENT_FD|PTH_UNTIL_FD_READABLE|PTH_MODE_STATIC, &ev_key, fd);
            if (ev_extra != NULL)
                pth_event_concat(ev, ev_extra, NULL);
            n = pth_wait(ev);
            if (ev_extra != NULL) {
                pth_event_isolate(ev);
                if (pth_event_status(ev) != PTH_STATUS_OCCURRED)
                    return pth_error(-1, EINTR);
            }
        }
    }

    /* Now perform the actual read. We're now guarrantied to not block,
       either because we were already in non-blocking mode or we determined
       above by polling that the next read(2) call will not block.  But keep
       in mind, that only 1 next read(2) call is guarrantied to not block
       (except for the EINTR situation). */
    while ((n = pth_sc(read)(fd, buf, nbytes)) < 0
           && errno == EINTR) ;

    pth_debug2("pth_read_ev: leave to thread \"%s\"", pth_current->name);
    return n;
}

/* Pth variant of write(2) */
ssize_t pth_write(int fd, const void *buf, size_t nbytes)
{
    return pth_write_ev(fd, buf, nbytes, NULL);
}

/* Pth variant of write(2) with extra event(s) */
ssize_t pth_write_ev(int fd, const void *buf, size_t nbytes, pth_event_t ev_extra)
{
    struct timeval delay;
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;
    fd_set fds;
    int fdmode;
    ssize_t rv;
    ssize_t s;
    int n;

    pth_implicit_init();
    pth_debug2("pth_write_ev: enter from thread \"%s\"", pth_current->name);

    /* POSIX compliance */
    if (nbytes == 0)
        return 0;
    if (!pth_util_fd_valid(fd))
        return pth_error(-1, EBADF);

    /* force filedescriptor into non-blocking mode */
    if ((fdmode = pth_fdmode(fd, PTH_FDMODE_NONBLOCK)) == PTH_FDMODE_ERROR)
        return pth_error(-1, EBADF);

    /* poll filedescriptor if not already in non-blocking operation */
    if (fdmode != PTH_FDMODE_NONBLOCK) {

        /* now directly poll filedescriptor for writeability
           to avoid unneccessary (and resource consuming because of context
           switches, etc) event handling through the scheduler */
        FD_ZERO(&fds);
        FD_SET(fd, &fds);
        delay.tv_sec  = 0;
        delay.tv_usec = 0;
        while ((n = pth_sc(select)(fd+1, NULL, &fds, NULL, &delay)) < 0
               && errno == EINTR) ;
        if (n < 0 && (errno == EINVAL || errno == EBADF))
            return pth_error(-1, errno);

        rv = 0;
        for (;;) {
            /* if filedescriptor is still not writeable,
               let thread sleep until it is or event occurs */
            if (n < 1) {
                ev = pth_event(PTH_EVENT_FD|PTH_UNTIL_FD_WRITEABLE|PTH_MODE_STATIC, &ev_key, fd);
                if (ev_extra != NULL)
                    pth_event_concat(ev, ev_extra, NULL);
                pth_wait(ev);
                if (ev_extra != NULL) {
                    pth_event_isolate(ev);
                    if (pth_event_status(ev) != PTH_STATUS_OCCURRED) {
                        pth_fdmode(fd, fdmode);
                        return pth_error(-1, EINTR);
                    }
                }
            }

            /* now perform the actual write operation */
            while ((s = pth_sc(write)(fd, buf, nbytes)) < 0
                   && errno == EINTR) ;
            if (s > 0)
                rv += s;

            /* although we're physically now in non-blocking mode,
               iterate unless all data is written or an error occurs, because
               we've to mimic the usual blocking I/O behaviour of write(2). */
            if (s > 0 && s < (ssize_t)nbytes) {
                nbytes -= s;
                buf = (void *)((char *)buf + s);
                n = 0;
                continue;
            }

            /* pass error to caller, but not for partial writes (rv > 0) */
            if (s < 0 && rv == 0)
                rv = -1;

            /* stop looping */
            break;
        }
    }
    else {
        /* just perform the actual write operation */
        while ((rv = pth_sc(write)(fd, buf, nbytes)) < 0
               && errno == EINTR) ;
    }

    /* restore filedescriptor mode */
    pth_shield { pth_fdmode(fd, fdmode); }

    pth_debug2("pth_write_ev: leave to thread \"%s\"", pth_current->name);
    return rv;
}

/* Pth variant of readv(2) */
ssize_t pth_readv(int fd, const struct iovec *iov, int iovcnt)
{
    return pth_readv_ev(fd, iov, iovcnt, NULL);
}

/* Pth variant of readv(2) with extra event(s) */
ssize_t pth_readv_ev(int fd, const struct iovec *iov, int iovcnt, pth_event_t ev_extra)
{
    struct timeval delay;
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;
    fd_set fds;
    int fdmode;
    int n;

    pth_implicit_init();
    pth_debug2("pth_readv_ev: enter from thread \"%s\"", pth_current->name);

    /* POSIX compliance */
    if (iovcnt <= 0 || iovcnt > UIO_MAXIOV)
        return pth_error(-1, EINVAL);
    if (!pth_util_fd_valid(fd))
        return pth_error(-1, EBADF);

    /* check mode of filedescriptor */
    if ((fdmode = pth_fdmode(fd, PTH_FDMODE_POLL)) == PTH_FDMODE_ERROR)
        return pth_error(-1, EBADF);

    /* poll filedescriptor if not already in non-blocking operation */
    if (fdmode == PTH_FDMODE_BLOCK) {

        /* first directly poll filedescriptor for readability
           to avoid unneccessary (and resource consuming because of context
           switches, etc) event handling through the scheduler */
        FD_ZERO(&fds);
        FD_SET(fd, &fds);
        delay.tv_sec  = 0;
        delay.tv_usec = 0;
        while ((n = pth_sc(select)(fd+1, &fds, NULL, NULL, &delay)) < 0
               && errno == EINTR) ;

        /* if filedescriptor is still not readable,
           let thread sleep until it is or event occurs */
        if (n < 1) {
            ev = pth_event(PTH_EVENT_FD|PTH_UNTIL_FD_READABLE|PTH_MODE_STATIC, &ev_key, fd);
            if (ev_extra != NULL)
                pth_event_concat(ev, ev_extra, NULL);
            n = pth_wait(ev);
            if (ev_extra != NULL) {
                pth_event_isolate(ev);
                if (pth_event_status(ev) != PTH_STATUS_OCCURRED)
                    return pth_error(-1, EINTR);
            }
        }
    }

    /* Now perform the actual read. We're now guarrantied to not block,
       either because we were already in non-blocking mode or we determined
       above by polling that the next read(2) call will not block.  But keep
       in mind, that only 1 next read(2) call is guarrantied to not block
       (except for the EINTR situation). */
#if PTH_FAKE_RWV
    while ((n = pth_readv_faked(fd, iov, iovcnt)) < 0
           && errno == EINTR) ;
#else
    while ((n = pth_sc(readv)(fd, iov, iovcnt)) < 0
           && errno == EINTR) ;
#endif

    pth_debug2("pth_readv_ev: leave to thread \"%s\"", pth_current->name);
    return n;
}

/* A faked version of readv(2) */
intern ssize_t pth_readv_faked(int fd, const struct iovec *iov, int iovcnt)
{
    char *buffer;
    size_t bytes, copy, rv;
    int i;

    /* determine total number of bytes to read */
    bytes = 0;
    for (i = 0; i < iovcnt; i++) {
        if (iov[i].iov_len <= 0)
            return pth_error((ssize_t)(-1), EINVAL);
        bytes += iov[i].iov_len;
    }
    if (bytes <= 0)
        return pth_error((ssize_t)(-1), EINVAL);

    /* allocate a temporary buffer */
    if ((buffer = (char *)malloc(bytes)) == NULL)
        return (ssize_t)(-1);

    /* read data into temporary buffer (caller guarrantied us to not block) */
    rv = pth_sc(read)(fd, buffer, bytes);

    /* scatter read data into callers vector */
    if (rv > 0) {
        bytes = rv;
        for (i = 0; i < iovcnt; i++) {
            copy = pth_util_min(iov[i].iov_len, bytes);
            memcpy(iov[i].iov_base, buffer, copy);
            buffer += copy;
            bytes  -= copy;
            if (bytes <= 0)
                break;
        }
    }

    /* remove the temporary buffer */
    pth_shield { free(buffer); }

    /* return number of read bytes */
    return(rv);
}

/* Pth variant of writev(2) */
ssize_t pth_writev(int fd, const struct iovec *iov, int iovcnt)
{
    return pth_writev_ev(fd, iov, iovcnt, NULL);
}

/* Pth variant of writev(2) with extra event(s) */
ssize_t pth_writev_ev(int fd, const struct iovec *iov, int iovcnt, pth_event_t ev_extra)
{
    struct timeval delay;
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;
    fd_set fds;
    int fdmode;
    struct iovec *liov;
    int liovcnt;
    size_t nbytes;
    ssize_t rv;
    ssize_t s;
    int n;
    struct iovec tiov_stack[32];
    struct iovec *tiov;
    int tiovcnt;

    pth_implicit_init();
    pth_debug2("pth_writev_ev: enter from thread \"%s\"", pth_current->name);

    /* POSIX compliance */
    if (iovcnt <= 0 || iovcnt > UIO_MAXIOV)
        return pth_error(-1, EINVAL);
    if (!pth_util_fd_valid(fd))
        return pth_error(-1, EBADF);

    /* force filedescriptor into non-blocking mode */
    if ((fdmode = pth_fdmode(fd, PTH_FDMODE_NONBLOCK)) == PTH_FDMODE_ERROR)
        return pth_error(-1, EBADF);

    /* poll filedescriptor if not already in non-blocking operation */
    if (fdmode != PTH_FDMODE_NONBLOCK) {
        /* provide temporary iovec structure */
        if (iovcnt > sizeof(tiov_stack)) {
            tiovcnt = (sizeof(struct iovec) * UIO_MAXIOV);
            if ((tiov = (struct iovec *)malloc(tiovcnt)) == NULL)
                return pth_error(-1, errno);
        }
        else {
            tiovcnt = sizeof(tiov_stack);
            tiov    = tiov_stack;
        }

        /* init return value and number of bytes to write */
        rv      = 0;
        nbytes  = pth_writev_iov_bytes(iov, iovcnt);

        /* init local iovec structure */
        liov    = NULL;
        liovcnt = 0;
        pth_writev_iov_advance(iov, iovcnt, 0, &liov, &liovcnt, tiov, tiovcnt);

        /* first directly poll filedescriptor for writeability
           to avoid unneccessary (and resource consuming because of context
           switches, etc) event handling through the scheduler */
        FD_ZERO(&fds);
        FD_SET(fd, &fds);
        delay.tv_sec  = 0;
        delay.tv_usec = 0;
        while ((n = pth_sc(select)(fd+1, NULL, &fds, NULL, &delay)) < 0
               && errno == EINTR) ;

        for (;;) {
            /* if filedescriptor is still not writeable,
               let thread sleep until it is or event occurs */
            if (n < 1) {
                ev = pth_event(PTH_EVENT_FD|PTH_UNTIL_FD_WRITEABLE|PTH_MODE_STATIC, &ev_key, fd);
                if (ev_extra != NULL)
                    pth_event_concat(ev, ev_extra, NULL);
                pth_wait(ev);
                if (ev_extra != NULL) {
                    pth_event_isolate(ev);
                    if (pth_event_status(ev) != PTH_STATUS_OCCURRED) {
                        pth_fdmode(fd, fdmode);
                        if (iovcnt > sizeof(tiov_stack))
                            free(tiov);
                        return pth_error(-1, EINTR);
                    }
                }
            }

            /* now perform the actual write operation */
#if PTH_FAKE_RWV
            while ((s = pth_writev_faked(fd, liov, liovcnt)) < 0
                   && errno == EINTR) ;
#else
            while ((s = pth_sc(writev)(fd, liov, liovcnt)) < 0
                   && errno == EINTR) ;
#endif
            if (s > 0)
                rv += s;

            /* although we're physically now in non-blocking mode,
               iterate unless all data is written or an error occurs, because
               we've to mimic the usual blocking I/O behaviour of writev(2) */
            if (s > 0 && s < (ssize_t)nbytes) {
                nbytes -= s;
                pth_writev_iov_advance(iov, iovcnt, s, &liov, &liovcnt, tiov, tiovcnt);
                n = 0;
                continue;
            }

            /* pass error to caller, but not for partial writes (rv > 0) */
            if (s < 0 && rv == 0)
                rv = -1;

            /* stop looping */
            break;
        }

        /* cleanup */
        if (iovcnt > sizeof(tiov_stack))
            free(tiov);
    }
    else {
        /* just perform the actual write operation */
#if PTH_FAKE_RWV
        while ((rv = pth_writev_faked(fd, iov, iovcnt)) < 0
               && errno == EINTR) ;
#else
        while ((rv = pth_sc(writev)(fd, iov, iovcnt)) < 0
               && errno == EINTR) ;
#endif
    }

    /* restore filedescriptor mode */
    pth_shield { pth_fdmode(fd, fdmode); }

    pth_debug2("pth_writev_ev: leave to thread \"%s\"", pth_current->name);
    return rv;
}

/* calculate number of bytes in a struct iovec */
intern ssize_t pth_writev_iov_bytes(const struct iovec *iov, int iovcnt)
{
    ssize_t bytes;
    int i;

    bytes = 0;
    for (i = 0; i < iovcnt; i++) {
        if (iov[i].iov_len <= 0)
            continue;
        bytes += iov[i].iov_len;
    }
    return bytes;
}

/* advance the virtual pointer of a struct iov */
intern void pth_writev_iov_advance(const struct iovec *riov, int riovcnt, size_t advance,
                                   struct iovec **liov, int *liovcnt,
                                   struct iovec *tiov, int tiovcnt)
{
    int i;

    if (*liov == NULL && *liovcnt == 0) {
        /* initialize with real (const) structure on first step */
        *liov = (struct iovec *)riov;
        *liovcnt = riovcnt;
    }
    if (advance > 0) {
        if (*liov == riov && *liovcnt == riovcnt) {
            /* reinitialize with a copy to be able to adjust it */
            *liov = &tiov[0];
            for (i = 0; i < riovcnt; i++) {
                tiov[i].iov_base = riov[i].iov_base;
                tiov[i].iov_len  = riov[i].iov_len;
            }
        }
        /* advance the virtual pointer */
        while (*liovcnt > 0 && advance > 0) {
            if ((*liov)->iov_len > advance) {
                (*liov)->iov_base = (char *)((*liov)->iov_base) + advance;
                (*liov)->iov_len -= advance;
                break;
            }
            else {
                advance -= (*liov)->iov_len;
                (*liovcnt)--;
                (*liov)++;
            }
        }
    }
    return;
}

/* A faked version of writev(2) */
intern ssize_t pth_writev_faked(int fd, const struct iovec *iov, int iovcnt)
{
    char *buffer, *cp;
    size_t bytes, to_copy, copy, rv;
    int i;

    /* determine total number of bytes to write */
    bytes = 0;
    for (i = 0; i < iovcnt; i++) {
        if (iov[i].iov_len <= 0)
            return pth_error((ssize_t)(-1), EINVAL);
        bytes += iov[i].iov_len;
    }
    if (bytes <= 0)
        return pth_error((ssize_t)(-1), EINVAL);

    /* allocate a temporary buffer to hold the data */
    if ((buffer = (char *)malloc(bytes)) == NULL)
        return (ssize_t)(-1);

    /* concatenate the data from callers vector into buffer */
    to_copy = bytes;
    cp = buffer;
    for (i = 0; i < iovcnt; i++) {
         copy = pth_util_min(iov[i].iov_len, to_copy);
         memcpy(cp, iov[i].iov_base, copy);
         to_copy -= copy;
         if (to_copy <= 0)
             break;
    }

    /* write continuous chunck of data (caller guarrantied us to not block) */
    rv = pth_sc(write)(fd, buffer, bytes);

    /* remove the temporary buffer */
    pth_shield { free(buffer); }

    return(rv);
}

/* Pth variant of POSIX pread(3) */
ssize_t pth_pread(int fd, void *buf, size_t nbytes, off_t offset)
{
    static pth_mutex_t mutex = PTH_MUTEX_INIT;
    off_t old_offset;
    ssize_t rc;

    /* protect us: pth_read can yield! */
    if (!pth_mutex_acquire(&mutex, FALSE, NULL))
        return (-1);

    /* remember current offset */
    if ((old_offset = lseek(fd, 0, SEEK_CUR)) == (off_t)(-1)) {
        pth_mutex_release(&mutex);
        return (-1);
    }
    /* seek to requested offset */
    if (lseek(fd, offset, SEEK_SET) == (off_t)(-1)) {
        pth_mutex_release(&mutex);
        return (-1);
    }

    /* perform the read operation */
    rc = pth_read(fd, buf, nbytes);

    /* restore the old offset situation */
    pth_shield { lseek(fd, old_offset, SEEK_SET); }

    /* unprotect and return result of read */
    pth_mutex_release(&mutex);
    return rc;
}

/* Pth variant of POSIX pwrite(3) */
ssize_t pth_pwrite(int fd, const void *buf, size_t nbytes, off_t offset)
{
    static pth_mutex_t mutex = PTH_MUTEX_INIT;
    off_t old_offset;
    ssize_t rc;

    /* protect us: pth_write can yield! */
    if (!pth_mutex_acquire(&mutex, FALSE, NULL))
        return (-1);

    /* remember current offset */
    if ((old_offset = lseek(fd, 0, SEEK_CUR)) == (off_t)(-1)) {
        pth_mutex_release(&mutex);
        return (-1);
    }
    /* seek to requested offset */
    if (lseek(fd, offset, SEEK_SET) == (off_t)(-1)) {
        pth_mutex_release(&mutex);
        return (-1);
    }

    /* perform the write operation */
    rc = pth_write(fd, buf, nbytes);

    /* restore the old offset situation */
    pth_shield { lseek(fd, old_offset, SEEK_SET); }

    /* unprotect and return result of write */
    pth_mutex_release(&mutex);
    return rc;
}

/* Pth variant of SUSv2 recv(2) */
ssize_t pth_recv(int s, void *buf, size_t len, int flags)
{
    return pth_recv_ev(s, buf, len, flags, NULL);
}

/* Pth variant of SUSv2 recv(2) with extra event(s) */
ssize_t pth_recv_ev(int s, void *buf, size_t len, int flags, pth_event_t ev)
{
    return pth_recvfrom_ev(s, buf, len, flags, NULL, 0, ev);
}

/* Pth variant of SUSv2 recvfrom(2) */
ssize_t pth_recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, socklen_t *fromlen)
{
    return pth_recvfrom_ev(s, buf, len, flags, from, fromlen, NULL);
}

/* Pth variant of SUSv2 recvfrom(2) with extra event(s) */
ssize_t pth_recvfrom_ev(int fd, void *buf, size_t nbytes, int flags, struct sockaddr *from, socklen_t *fromlen, pth_event_t ev_extra)
{
    struct timeval delay;
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;
    fd_set fds;
    int fdmode;
    int n;

    pth_implicit_init();
    pth_debug2("pth_recvfrom_ev: enter from thread \"%s\"", pth_current->name);

    /* POSIX compliance */
    if (nbytes == 0)
        return 0;
    if (!pth_util_fd_valid(fd))
        return pth_error(-1, EBADF);

    /* check mode of filedescriptor */
    if ((fdmode = pth_fdmode(fd, PTH_FDMODE_POLL)) == PTH_FDMODE_ERROR)
        return pth_error(-1, EBADF);

    /* poll filedescriptor if not already in non-blocking operation */
    if (fdmode == PTH_FDMODE_BLOCK) {

        /* now directly poll filedescriptor for readability
           to avoid unneccessary (and resource consuming because of context
           switches, etc) event handling through the scheduler */
        if (!pth_util_fd_valid(fd))
            return pth_error(-1, EBADF);
        FD_ZERO(&fds);
        FD_SET(fd, &fds);
        delay.tv_sec  = 0;
        delay.tv_usec = 0;
        while ((n = pth_sc(select)(fd+1, &fds, NULL, NULL, &delay)) < 0
               && errno == EINTR) ;
        if (n < 0 && (errno == EINVAL || errno == EBADF))
            return pth_error(-1, errno);

        /* if filedescriptor is still not readable,
           let thread sleep until it is or the extra event occurs */
        if (n == 0) {
            ev = pth_event(PTH_EVENT_FD|PTH_UNTIL_FD_READABLE|PTH_MODE_STATIC, &ev_key, fd);
            if (ev_extra != NULL)
                pth_event_concat(ev, ev_extra, NULL);
            n = pth_wait(ev);
            if (ev_extra != NULL) {
                pth_event_isolate(ev);
                if (pth_event_status(ev) != PTH_STATUS_OCCURRED)
                    return pth_error(-1, EINTR);
            }
        }
    }

    /* now perform the actual read. We're now guarrantied to not block,
       either because we were already in non-blocking mode or we determined
       above by polling that the next recvfrom(2) call will not block.  But keep
       in mind, that only 1 next recvfrom(2) call is guarrantied to not block
       (except for the EINTR situation). */
    while ((n = pth_sc(recvfrom)(fd, buf, nbytes, flags, from, fromlen)) < 0
           && errno == EINTR) ;

    pth_debug2("pth_recvfrom_ev: leave to thread \"%s\"", pth_current->name);
    return n;
}

/* Pth variant of SUSv2 send(2) */
ssize_t pth_send(int s, const void *buf, size_t len, int flags)
{
    return pth_send_ev(s, buf, len, flags, NULL);
}

/* Pth variant of SUSv2 send(2) with extra event(s) */
ssize_t pth_send_ev(int s, const void *buf, size_t len, int flags, pth_event_t ev)
{
    return pth_sendto_ev(s, buf, len, flags, NULL, 0, ev);
}

/* Pth variant of SUSv2 sendto(2) */
ssize_t pth_sendto(int s, const void *buf, size_t len, int flags, const struct sockaddr *to, socklen_t tolen)
{
    return pth_sendto_ev(s, buf, len, flags, to, tolen, NULL);
}

/* Pth variant of SUSv2 sendto(2) with extra event(s) */
ssize_t pth_sendto_ev(int fd, const void *buf, size_t nbytes, int flags, const struct sockaddr *to, socklen_t tolen, pth_event_t ev_extra)
{
    struct timeval delay;
    pth_event_t ev;
    static pth_key_t ev_key = PTH_KEY_INIT;
    fd_set fds;
    int fdmode;
    ssize_t rv;
    ssize_t s;
    int n;

    pth_implicit_init();
    pth_debug2("pth_sendto_ev: enter from thread \"%s\"", pth_current->name);

    /* POSIX compliance */
    if (nbytes == 0)
        return 0;
    if (!pth_util_fd_valid(fd))
        return pth_error(-1, EBADF);

    /* force filedescriptor into non-blocking mode */
    if ((fdmode = pth_fdmode(fd, PTH_FDMODE_NONBLOCK)) == PTH_FDMODE_ERROR)
        return pth_error(-1, EBADF);

    /* poll filedescriptor if not already in non-blocking operation */
    if (fdmode != PTH_FDMODE_NONBLOCK) {

        /* now directly poll filedescriptor for writeability
           to avoid unneccessary (and resource consuming because of context
           switches, etc) event handling through the scheduler */
        if (!pth_util_fd_valid(fd)) {
            pth_fdmode(fd, fdmode);
            return pth_error(-1, EBADF);
        }
        FD_ZERO(&fds);
        FD_SET(fd, &fds);
        delay.tv_sec  = 0;
        delay.tv_usec = 0;
        while ((n = pth_sc(select)(fd+1, NULL, &fds, NULL, &delay)) < 0
               && errno == EINTR) ;
        if (n < 0 && (errno == EINVAL || errno == EBADF))
            return pth_error(-1, errno);

        rv = 0;
        for (;;) {
            /* if filedescriptor is still not writeable,
               let thread sleep until it is or event occurs */
            if (n == 0) {
                ev = pth_event(PTH_EVENT_FD|PTH_UNTIL_FD_WRITEABLE|PTH_MODE_STATIC, &ev_key, fd);
                if (ev_extra != NULL)
                    pth_event_concat(ev, ev_extra, NULL);
                pth_wait(ev);
                if (ev_extra != NULL) {
                    pth_event_isolate(ev);
                    if (pth_event_status(ev) != PTH_STATUS_OCCURRED) {
                        pth_fdmode(fd, fdmode);
                        return pth_error(-1, EINTR);
                    }
                }
            }

            /* now perform the actual send operation */
            while ((s = pth_sc(sendto)(fd, buf, nbytes, flags, to, tolen)) < 0
                   && errno == EINTR) ;
            if (s > 0)
                rv += s;

            /* although we're physically now in non-blocking mode,
               iterate unless all data is written or an error occurs, because
               we've to mimic the usual blocking I/O behaviour of write(2). */
            if (s > 0 && s < (ssize_t)nbytes) {
                nbytes -= s;
                buf = (void *)((char *)buf + s);
                n = 0;
                continue;
            }

            /* pass error to caller, but not for partial writes (rv > 0) */
            if (s < 0 && rv == 0)
                rv = -1;

            /* stop looping */
            break;
        }
    }
    else {
        /* just perform the actual send operation */
        while ((rv = pth_sc(sendto)(fd, buf, nbytes, flags, to, tolen)) < 0
               && errno == EINTR) ;
    }

    /* restore filedescriptor mode */
    pth_shield { pth_fdmode(fd, fdmode); }

    pth_debug2("pth_sendto_ev: leave to thread \"%s\"", pth_current->name);
    return rv;
}

/* ==== pth_ext.c ==== */

/*
 * Sfio Extension:
 *
 * We provide an Sfio discipline which can be pushed on an Sfio_t* stream
 * to use the Pth thread-aware I/O routines (pth_read/pth_write).
 */

#if PTH_EXT_SFIO

static ssize_t pth_sfio_read(Sfio_t *f, Void_t *buf, size_t n, Sfdisc_t *disc)
{
    ssize_t rv;

    rv = pth_read(sffileno(f), buf, n);
    return rv;
}

static ssize_t pth_sfio_write(Sfio_t *f, const Void_t *buf, size_t n, Sfdisc_t *disc)
{
    ssize_t rv;

    rv = pth_write(sffileno(f), buf, n);
    return rv;
}

static Sfoff_t pth_sfio_seek(Sfio_t *f, Sfoff_t addr, int type, Sfdisc_t *disc)
{
    return sfsk(f, addr, type, disc);
}

static int pth_sfio_except(Sfio_t *f, int type, Void_t* data, Sfdisc_t *disc)
{
    int rv;

    switch (type) {
        case SF_LOCKED:
        case SF_READ:
        case SF_WRITE:
        case SF_SEEK:
        case SF_NEW:
        case SF_CLOSE:
        case SF_FINAL:
        case SF_DPUSH:
        case SF_DPOP:
        case SF_DBUFFER:
        case SF_DPOLL:
        case SF_READY:
        case SF_SYNC:
        case SF_PURGE:
        default:
            rv = 0; /* perform default action */
    }
    return rv;
}

#endif /* PTH_EXT_SFIO */

Sfdisc_t *pth_sfiodisc(void)
{
#if PTH_EXT_SFIO
    Sfdisc_t *disc;

    if ((disc = (Sfdisc_t *)malloc(sizeof(Sfdisc_t))) == NULL)
        return pth_error((SFdisc_t *)NULL, errno);
    disc->readf   = pth_sfio_read;
    disc->writef  = pth_sfio_write;
    disc->seekf   = pth_sfio_seek;
    disc->exceptf = pth_sfio_except;
    return disc;
#else
    return pth_error((Sfdisc_t *)NULL, ENOSYS);
#endif /* PTH_EXT_SFIO */
}

/* ==== pth_string.c ==== */

#if HAVE_LONGLONG
#define LLONG long long
#else
#define LLONG long
#endif

#if HAVE_LONGDOUBLE
#define LDOUBLE long double
#else
#define LDOUBLE double
#endif

static void fmtstr     (char *, size_t *, size_t, char *, int, int, int);
static void fmtint     (char *, size_t *, size_t, LLONG, int, int, int, int);
static void fmtfp      (char *, size_t *, size_t, LDOUBLE, int, int, int);
static void dopr_outch (char *, size_t *, size_t, int);

/* format read states */
#define DP_S_DEFAULT    0
#define DP_S_FLAGS      1
#define DP_S_MIN        2
#define DP_S_DOT        3
#define DP_S_MAX        4
#define DP_S_MOD        5
#define DP_S_CONV       6
#define DP_S_DONE       7

/* format flags - Bits */
#define DP_F_MINUS      (1 << 0)
#define DP_F_PLUS       (1 << 1)
#define DP_F_SPACE      (1 << 2)
#define DP_F_NUM        (1 << 3)
#define DP_F_ZERO       (1 << 4)
#define DP_F_UP         (1 << 5)
#define DP_F_UNSIGNED   (1 << 6)

/* conversion flags */
#define DP_C_SHORT      1
#define DP_C_LONG       2
#define DP_C_LDOUBLE    3
#define DP_C_LLONG      4

/* some handy macros */
#define char_to_int(p) (p - '0')
#define MAX(p,q) ((p >= q) ? p : q)
#define NUL '\0'

static void
dopr(
    char *buffer,
    size_t maxlen,
    size_t *retlen,
    const char *format,
    va_list args)
{
    char ch;
    LLONG value;
    LDOUBLE fvalue;
    char *strvalue;
    int min;
    int max;
    int state;
    int flags;
    int cflags;
    size_t currlen;

    state = DP_S_DEFAULT;
    flags = currlen = cflags = min = 0;
    max = -1;
    ch = *format++;

    if (maxlen == -1)
        /* possible maximum size in a size_t */
        maxlen = (~(1<<((sizeof(size_t)*8)-2)));

    while (state != DP_S_DONE) {
        if ((ch == NUL) || (currlen >= maxlen))
            state = DP_S_DONE;

        switch (state) {
        case DP_S_DEFAULT:
            if (ch == '%')
                state = DP_S_FLAGS;
            else
                dopr_outch(buffer, &currlen, maxlen, ch);
            ch = *format++;
            break;
        case DP_S_FLAGS:
            switch (ch) {
                case '-':
                    flags |= DP_F_MINUS;
                    ch = *format++;
                    break;
                case '+':
                    flags |= DP_F_PLUS;
                    ch = *format++;
                    break;
                case ' ':
                    flags |= DP_F_SPACE;
                    ch = *format++;
                    break;
                case '#':
                    flags |= DP_F_NUM;
                    ch = *format++;
                    break;
                case '0':
                    flags |= DP_F_ZERO;
                    ch = *format++;
                    break;
                default:
                    state = DP_S_MIN;
                    break;
            }
            break;
        case DP_S_MIN:
            if (isdigit((unsigned char)ch)) {
                min = 10 * min + char_to_int(ch);
                ch = *format++;
            } else if (ch == '*') {
                min = va_arg(args, int);
                ch = *format++;
                state = DP_S_DOT;
            } else
                state = DP_S_DOT;
            break;
        case DP_S_DOT:
            if (ch == '.') {
                state = DP_S_MAX;
                ch = *format++;
            } else
                state = DP_S_MOD;
            break;
        case DP_S_MAX:
            if (isdigit((unsigned char)ch)) {
                if (max < 0)
                    max = 0;
                max = 10 * max + char_to_int(ch);
                ch = *format++;
            } else if (ch == '*') {
                max = va_arg(args, int);
                ch = *format++;
                state = DP_S_MOD;
            } else
                state = DP_S_MOD;
            break;
        case DP_S_MOD:
            switch (ch) {
                case 'h':
                    cflags = DP_C_SHORT;
                    ch = *format++;
                    break;
                case 'l':
                    if (*format == 'l') {
                        cflags = DP_C_LLONG;
                        format++;
                    } else
                        cflags = DP_C_LONG;
                    ch = *format++;
                    break;
                case 'q':
                    cflags = DP_C_LLONG;
                    ch = *format++;
                    break;
                case 'L':
                    cflags = DP_C_LDOUBLE;
                    ch = *format++;
                    break;
                default:
                    break;
            }
            state = DP_S_CONV;
            break;
        case DP_S_CONV:
            switch (ch) {
            case 'd':
            case 'i':
                switch (cflags) {
                case DP_C_SHORT:
                    value = (short int)va_arg(args, int);
                    break;
                case DP_C_LONG:
                    value = va_arg(args, long int);
                    break;
                case DP_C_LLONG:
                    value = va_arg(args, LLONG);
                    break;
                default:
                    value = va_arg(args, int);
                    break;
                }
                fmtint(buffer, &currlen, maxlen, value, 10, min, max, flags);
                break;
            case 'X':
                flags |= DP_F_UP;
                /* FALLTHROUGH */
            case 'x':
            case 'o':
            case 'u':
                flags |= DP_F_UNSIGNED;
                switch (cflags) {
                    case DP_C_SHORT:
                        value = (unsigned short int)va_arg(args, unsigned int);
                        break;
                    case DP_C_LONG:
                        value = (LLONG)va_arg(args, unsigned long int);
                        break;
                    case DP_C_LLONG:
                        value = va_arg(args, unsigned LLONG);
                        break;
                    default:
                        value = (LLONG)va_arg(args, unsigned int);
                        break;
                }
                fmtint(buffer, &currlen, maxlen, value,
                       ch == 'o' ? 8 : (ch == 'u' ? 10 : 16),
                       min, max, flags);
                break;
            case 'f':
                if (cflags == DP_C_LDOUBLE)
                    fvalue = va_arg(args, LDOUBLE);
                else
                    fvalue = va_arg(args, double);
                fmtfp(buffer, &currlen, maxlen, fvalue, min, max, flags);
                break;
            case 'E':
                flags |= DP_F_UP;
            case 'e':
                if (cflags == DP_C_LDOUBLE)
                    fvalue = va_arg(args, LDOUBLE);
                else
                    fvalue = va_arg(args, double);
                break;
            case 'G':
                flags |= DP_F_UP;
            case 'g':
                if (cflags == DP_C_LDOUBLE)
                    fvalue = va_arg(args, LDOUBLE);
                else
                    fvalue = va_arg(args, double);
                break;
            case 'c':
                dopr_outch(buffer, &currlen, maxlen, va_arg(args, int));
                break;
            case 's':
                strvalue = va_arg(args, char *);
                if (max < 0)
                    max = maxlen;
                fmtstr(buffer, &currlen, maxlen, strvalue, flags, min, max);
                break;
            case 'p':
                value = (long)va_arg(args, void *);
                fmtint(buffer, &currlen, maxlen, value, 16, min, max, flags);
                break;
            case 'n': /* XXX */
                if (cflags == DP_C_SHORT) {
                    short int *num;
                    num = va_arg(args, short int *);
                    *num = currlen;
                } else if (cflags == DP_C_LONG) { /* XXX */
                    long int *num;
                    num = va_arg(args, long int *);
                    *num = (long int) currlen;
                } else if (cflags == DP_C_LLONG) { /* XXX */
                    LLONG *num;
                    num = va_arg(args, LLONG *);
                    *num = (LLONG) currlen;
                } else {
                    int    *num;
                    num = va_arg(args, int *);
                    *num = currlen;
                }
                break;
            case '%':
                dopr_outch(buffer, &currlen, maxlen, ch);
                break;
            case 'w':
                /* not supported yet, treat as next char */
                ch = *format++;
                break;
            default:
                /* unknown, skip */
                break;
            }
            ch = *format++;
            state = DP_S_DEFAULT;
            flags = cflags = min = 0;
            max = -1;
            break;
        case DP_S_DONE:
            break;
        default:
            break;
        }
    }
    if (currlen >= maxlen - 1)
        currlen = maxlen - 1;
    if (buffer != NULL)
        buffer[currlen] = NUL;
    *retlen = currlen;
    return;
}

static void
fmtstr(
    char *buffer,
    size_t *currlen,
    size_t maxlen,
    char *value,
    int flags,
    int min,
    int max)
{
    int padlen, strln;
    int cnt = 0;

    if (value == 0)
        value = "<NULL>";
    for (strln = 0; value[strln] != '\0'; strln++)
        ;
    padlen = min - strln;
    if (padlen < 0)
        padlen = 0;
    if (flags & DP_F_MINUS)
        padlen = -padlen;

    while ((padlen > 0) && (cnt < max)) {
        dopr_outch(buffer, currlen, maxlen, ' ');
        --padlen;
        ++cnt;
    }
    while (*value && (cnt < max)) {
        dopr_outch(buffer, currlen, maxlen, *value++);
        ++cnt;
    }
    while ((padlen < 0) && (cnt < max)) {
        dopr_outch(buffer, currlen, maxlen, ' ');
        ++padlen;
        ++cnt;
    }
}

static void
fmtint(
    char *buffer,
    size_t *currlen,
    size_t maxlen,
    LLONG value,
    int base,
    int min,
    int max,
    int flags)
{
    int signvalue = 0;
    unsigned LLONG uvalue;
    char convert[20];
    int place = 0;
    int spadlen = 0;
    int zpadlen = 0;
    int caps = 0;

    if (max < 0)
        max = 0;
    uvalue = value;
    if (!(flags & DP_F_UNSIGNED)) {
        if (value < 0) {
            signvalue = '-';
            uvalue = -value;
        } else if (flags & DP_F_PLUS)
            signvalue = '+';
        else if (flags & DP_F_SPACE)
            signvalue = ' ';
    }
    if (flags & DP_F_UP)
        caps = 1;
    do {
        convert[place++] =
            (caps ? "0123456789ABCDEF" : "0123456789abcdef")
            [uvalue % (unsigned) base];
        uvalue = (uvalue / (unsigned) base);
    } while (uvalue && (place < 20));
    if (place == 20)
        place--;
    convert[place] = 0;

    zpadlen = max - place;
    spadlen = min - MAX(max, place) - (signvalue ? 1 : 0);
    if (zpadlen < 0)
        zpadlen = 0;
    if (spadlen < 0)
        spadlen = 0;
    if (flags & DP_F_ZERO) {
        zpadlen = MAX(zpadlen, spadlen);
        spadlen = 0;
    }
    if (flags & DP_F_MINUS)
        spadlen = -spadlen;

    /* spaces */
    while (spadlen > 0) {
        dopr_outch(buffer, currlen, maxlen, ' ');
        --spadlen;
    }

    /* sign */
    if (signvalue)
        dopr_outch(buffer, currlen, maxlen, signvalue);

    /* zeros */
    if (zpadlen > 0) {
        while (zpadlen > 0) {
            dopr_outch(buffer, currlen, maxlen, '0');
            --zpadlen;
        }
    }
    /* digits */
    while (place > 0)
        dopr_outch(buffer, currlen, maxlen, convert[--place]);

    /* left justified spaces */
    while (spadlen < 0) {
        dopr_outch(buffer, currlen, maxlen, ' ');
        ++spadlen;
    }
    return;
}

static LDOUBLE
abs_val(LDOUBLE value)
{
    LDOUBLE result = value;
    if (value < 0)
        result = -value;
    return result;
}

static LDOUBLE
pow10(int exp)
{
    LDOUBLE result = 1;
    while (exp) {
        result *= 10;
        exp--;
    }
    return result;
}

static long
round(LDOUBLE value)
{
    long intpart;
    intpart = (long) value;
    value = value - intpart;
    if (value >= 0.5)
        intpart++;
    return intpart;
}

static void
fmtfp(
    char *buffer,
    size_t *currlen,
    size_t maxlen,
    LDOUBLE fvalue,
    int min,
    int max,
    int flags)
{
    int signvalue = 0;
    LDOUBLE ufvalue;
    char iconvert[20];
    char fconvert[20];
    int iplace = 0;
    int fplace = 0;
    int padlen = 0;
    int zpadlen = 0;
    int caps = 0;
    long intpart;
    long fracpart;

    if (max < 0)
        max = 6;
    ufvalue = abs_val(fvalue);
    if (fvalue < 0)
        signvalue = '-';
    else if (flags & DP_F_PLUS)
        signvalue = '+';
    else if (flags & DP_F_SPACE)
        signvalue = ' ';

    intpart = (long)ufvalue;

    /* sorry, we only support 9 digits past the decimal because of our
       conversion method */
    if (max > 9)
        max = 9;

    /* we "cheat" by converting the fractional part to integer by
       multiplying by a factor of 10 */
    fracpart = round((pow10(max)) * (ufvalue - intpart));

    if (fracpart >= pow10(max)) {
        intpart++;
        fracpart -= pow10(max);
    }

    /* convert integer part */
    do {
        iconvert[iplace++] =
            (caps ? "0123456789ABCDEF"
              : "0123456789abcdef")[intpart % 10];
        intpart = (intpart / 10);
    } while (intpart && (iplace < 20));
    if (iplace == 20)
        iplace--;
    iconvert[iplace] = 0;

    /* convert fractional part */
    do {
        fconvert[fplace++] =
            (caps ? "0123456789ABCDEF"
              : "0123456789abcdef")[fracpart % 10];
        fracpart = (fracpart / 10);
    } while (fracpart && (fplace < 20));
    if (fplace == 20)
        fplace--;
    fconvert[fplace] = 0;

    /* -1 for decimal point, another -1 if we are printing a sign */
    padlen = min - iplace - max - 1 - ((signvalue) ? 1 : 0);
    zpadlen = max - fplace;
    if (zpadlen < 0)
        zpadlen = 0;
    if (padlen < 0)
        padlen = 0;
    if (flags & DP_F_MINUS)
        padlen = -padlen;

    if ((flags & DP_F_ZERO) && (padlen > 0)) {
        if (signvalue) {
            dopr_outch(buffer, currlen, maxlen, signvalue);
            --padlen;
            signvalue = 0;
        }
        while (padlen > 0) {
            dopr_outch(buffer, currlen, maxlen, '0');
            --padlen;
        }
    }
    while (padlen > 0) {
        dopr_outch(buffer, currlen, maxlen, ' ');
        --padlen;
    }
    if (signvalue)
        dopr_outch(buffer, currlen, maxlen, signvalue);

    while (iplace > 0)
        dopr_outch(buffer, currlen, maxlen, iconvert[--iplace]);

    /*
     * Decimal point. This should probably use locale to find the correct
     * char to print out.
     */
    if (max > 0) {
        dopr_outch(buffer, currlen, maxlen, '.');

        while (fplace > 0)
            dopr_outch(buffer, currlen, maxlen, fconvert[--fplace]);
    }
    while (zpadlen > 0) {
        dopr_outch(buffer, currlen, maxlen, '0');
        --zpadlen;
    }

    while (padlen < 0) {
        dopr_outch(buffer, currlen, maxlen, ' ');
        ++padlen;
    }
    return;
}

static void
dopr_outch(
    char *buffer,
    size_t *currlen,
    size_t maxlen,
    int c)
{
    if (*currlen < maxlen) {
        if (buffer != NULL)
            buffer[(*currlen)] = (char)c;
        (*currlen)++;
    }
    return;
}

intern int
pth_vsnprintf(
    char *str,
    size_t count,
    const char *fmt,
    va_list args)
{
    size_t retlen;

    if (str != NULL)
        str[0] = NUL;
    dopr(str, count, &retlen, fmt, args);
    return retlen;
}

intern int
pth_snprintf(
    char *str,
    size_t count,
    const char *fmt,
    ...)
{
    va_list ap;
    int rv;

    va_start(ap, fmt);
    rv = pth_vsnprintf(str, count, fmt, ap);
    va_end(ap);
    return rv;
}

intern char *
pth_vasprintf(
    const char *fmt,
    va_list ap)
{
    char *rv;
    int n;

    n = pth_vsnprintf(NULL, -1, fmt, ap);
    if ((rv = (char *)malloc(n+1)) == NULL)
        return NULL;
    pth_vsnprintf(rv, n+1, fmt, ap);
    return rv;
}

intern char *
pth_asprintf(
    const char *fmt,
    ...)
{
    va_list ap;
    char *rv;

    va_start(ap, fmt);
    rv = pth_vasprintf(fmt, ap);
    va_end(ap);
    return rv;
}

