openssl/crypto/threads_win.c
Georgi Valkov 16beec98d2 threads_win: fix build error with VS2010 x86
InterlockedAnd64 and InterlockedAdd64 are not available on VS2010 x86.
We already have implemented replacements for other functions, such as
InterlockedOr64. Apply the same approach to fix the errors.
A CRYPTO_RWLOCK rw_lock is added to rcu_lock_st.

Replace InterlockedOr64 and InterlockedOr with CRYPTO_atomic_load and
CRYPTO_atomic_load_int, using the existing design pattern.

Add documentation and tests for the new atomic functions
CRYPTO_atomic_add64, CRYPTO_atomic_and

Fixes:
libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedAdd64 referenced in function _get_hold_current_qp
libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedOr referenced in function _get_hold_current_qp
libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedAnd64 referenced in function _update_qp
libcrypto.lib(libcrypto-lib-threads_win.obj) : error LNK2019: unresolved external symbol _InterlockedOr64 referenced in function _ossl_synchronize_rcu

Signed-off-by: Georgi Valkov <gvalkov@gmail.com>

Reviewed-by: Neil Horman <nhorman@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/24405)
2024-07-01 10:02:02 +02:00

694 lines
17 KiB
C

/*
* Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
#if defined(_WIN32)
# include <windows.h>
# if defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x600
# define USE_RWLOCK
# endif
#endif
#include <assert.h>
/*
* VC++ 2008 or earlier x86 compilers do not have an inline implementation
* of InterlockedOr64 for 32bit and will fail to run on Windows XP 32bit.
* https://docs.microsoft.com/en-us/cpp/intrinsics/interlockedor-intrinsic-functions#requirements
* To work around this problem, we implement a manual locking mechanism for
* only VC++ 2008 or earlier x86 compilers.
*/
#if (defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER <= 1600)
# define NO_INTERLOCKEDOR64
#endif
#include <openssl/crypto.h>
#include <crypto/cryptlib.h>
#include "internal/common.h"
#include "internal/thread_arch.h"
#include "internal/rcu.h"
#include "rcu_internal.h"
#if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && defined(OPENSSL_SYS_WINDOWS)
# ifdef USE_RWLOCK
typedef struct {
SRWLOCK lock;
int exclusive;
} CRYPTO_win_rwlock;
# endif
# define READER_SHIFT 0
# define ID_SHIFT 32
# define READER_SIZE 32
# define ID_SIZE 32
# define READER_MASK (((LONG64)1 << READER_SIZE)-1)
# define ID_MASK (((LONG64)1 << ID_SIZE)-1)
# define READER_COUNT(x) (((LONG64)(x) >> READER_SHIFT) & READER_MASK)
# define ID_VAL(x) (((LONG64)(x) >> ID_SHIFT) & ID_MASK)
# define VAL_READER ((LONG64)1 << READER_SHIFT)
# define VAL_ID(x) ((LONG64)x << ID_SHIFT)
/*
* This defines a quescent point (qp)
* This is the barrier beyond which a writer
* must wait before freeing data that was
* atomically updated
*/
struct rcu_qp {
volatile LONG64 users;
};
struct thread_qp {
struct rcu_qp *qp;
unsigned int depth;
CRYPTO_RCU_LOCK *lock;
};
#define MAX_QPS 10
/*
* This is the per thread tracking data
* that is assigned to each thread participating
* in an rcu qp
*
* qp points to the qp that it last acquired
*
*/
struct rcu_thr_data {
struct thread_qp thread_qps[MAX_QPS];
};
/*
* This is the internal version of a CRYPTO_RCU_LOCK
* it is cast from CRYPTO_RCU_LOCK
*/
struct rcu_lock_st {
struct rcu_cb_item *cb_items;
OSSL_LIB_CTX *ctx;
uint32_t id_ctr;
struct rcu_qp *qp_group;
size_t group_count;
uint32_t next_to_retire;
volatile long int reader_idx;
uint32_t current_alloc_idx;
uint32_t writers_alloced;
CRYPTO_MUTEX *write_lock;
CRYPTO_MUTEX *alloc_lock;
CRYPTO_CONDVAR *alloc_signal;
CRYPTO_MUTEX *prior_lock;
CRYPTO_CONDVAR *prior_signal;
CRYPTO_RWLOCK *rw_lock;
};
static struct rcu_qp *allocate_new_qp_group(struct rcu_lock_st *lock,
int count)
{
struct rcu_qp *new =
OPENSSL_zalloc(sizeof(*new) * count);
lock->group_count = count;
return new;
}
CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx)
{
struct rcu_lock_st *new;
if (num_writers < 1)
num_writers = 1;
ctx = ossl_lib_ctx_get_concrete(ctx);
if (ctx == NULL)
return 0;
new = OPENSSL_zalloc(sizeof(*new));
if (new == NULL)
return NULL;
new->ctx = ctx;
new->rw_lock = CRYPTO_THREAD_lock_new();
new->write_lock = ossl_crypto_mutex_new();
new->alloc_signal = ossl_crypto_condvar_new();
new->prior_signal = ossl_crypto_condvar_new();
new->alloc_lock = ossl_crypto_mutex_new();
new->prior_lock = ossl_crypto_mutex_new();
new->qp_group = allocate_new_qp_group(new, num_writers + 1);
if (new->qp_group == NULL
|| new->alloc_signal == NULL
|| new->prior_signal == NULL
|| new->write_lock == NULL
|| new->alloc_lock == NULL
|| new->prior_lock == NULL
|| new->rw_lock == NULL) {
CRYPTO_THREAD_lock_free(new->rw_lock);
OPENSSL_free(new->qp_group);
ossl_crypto_condvar_free(&new->alloc_signal);
ossl_crypto_condvar_free(&new->prior_signal);
ossl_crypto_mutex_free(&new->alloc_lock);
ossl_crypto_mutex_free(&new->prior_lock);
ossl_crypto_mutex_free(&new->write_lock);
OPENSSL_free(new);
new = NULL;
}
return new;
}
void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
{
CRYPTO_THREAD_lock_free(lock->rw_lock);
OPENSSL_free(lock->qp_group);
ossl_crypto_condvar_free(&lock->alloc_signal);
ossl_crypto_condvar_free(&lock->prior_signal);
ossl_crypto_mutex_free(&lock->alloc_lock);
ossl_crypto_mutex_free(&lock->prior_lock);
ossl_crypto_mutex_free(&lock->write_lock);
OPENSSL_free(lock);
}
static ossl_inline struct rcu_qp *get_hold_current_qp(CRYPTO_RCU_LOCK *lock)
{
uint32_t qp_idx;
uint32_t tmp;
uint64_t tmp64;
/* get the current qp index */
for (;;) {
CRYPTO_atomic_load_int(&lock->reader_idx, (int *)&qp_idx,
lock->rw_lock);
CRYPTO_atomic_add64(&lock->qp_group[qp_idx].users, VAL_READER, &tmp64,
lock->rw_lock);
CRYPTO_atomic_load_int(&lock->reader_idx, (int *)&tmp, lock->rw_lock);
if (qp_idx == tmp)
break;
CRYPTO_atomic_add64(&lock->qp_group[qp_idx].users, -VAL_READER, &tmp64,
lock->rw_lock);
}
return &lock->qp_group[qp_idx];
}
static void ossl_rcu_free_local_data(void *arg)
{
OSSL_LIB_CTX *ctx = arg;
CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(ctx);
struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
OPENSSL_free(data);
CRYPTO_THREAD_set_local(lkey, NULL);
}
void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
{
struct rcu_thr_data *data;
int i;
int available_qp = -1;
CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
/*
* we're going to access current_qp here so ask the
* processor to fetch it
*/
data = CRYPTO_THREAD_get_local(lkey);
if (data == NULL) {
data = OPENSSL_zalloc(sizeof(*data));
OPENSSL_assert(data != NULL);
CRYPTO_THREAD_set_local(lkey, data);
ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data);
}
for (i = 0; i < MAX_QPS; i++) {
if (data->thread_qps[i].qp == NULL && available_qp == -1)
available_qp = i;
/* If we have a hold on this lock already, we're good */
if (data->thread_qps[i].lock == lock)
return;
}
/*
* if we get here, then we don't have a hold on this lock yet
*/
assert(available_qp != -1);
data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
data->thread_qps[available_qp].depth = 1;
data->thread_qps[available_qp].lock = lock;
}
void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
{
ossl_crypto_mutex_lock(lock->write_lock);
}
void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
{
ossl_crypto_mutex_unlock(lock->write_lock);
}
void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
{
CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
int i;
LONG64 ret;
assert(data != NULL);
for (i = 0; i < MAX_QPS; i++) {
if (data->thread_qps[i].lock == lock) {
data->thread_qps[i].depth--;
if (data->thread_qps[i].depth == 0) {
CRYPTO_atomic_add64(&data->thread_qps[i].qp->users,
-VAL_READER, (uint64_t *)&ret,
lock->rw_lock);
OPENSSL_assert(ret >= 0);
data->thread_qps[i].qp = NULL;
data->thread_qps[i].lock = NULL;
}
return;
}
}
}
static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock)
{
uint64_t new_id;
uint32_t current_idx;
uint32_t tmp;
uint64_t tmp64;
ossl_crypto_mutex_lock(lock->alloc_lock);
/*
* we need at least one qp to be available with one
* left over, so that readers can start working on
* one that isn't yet being waited on
*/
while (lock->group_count - lock->writers_alloced < 2)
ossl_crypto_condvar_wait(lock->alloc_signal, lock->alloc_lock);
current_idx = lock->current_alloc_idx;
/* Allocate the qp */
lock->writers_alloced++;
/* increment the allocation index */
lock->current_alloc_idx =
(lock->current_alloc_idx + 1) % lock->group_count;
/* get and insert a new id */
new_id = lock->id_ctr;
lock->id_ctr++;
new_id = VAL_ID(new_id);
CRYPTO_atomic_and(&lock->qp_group[current_idx].users, ID_MASK, &tmp64,
lock->rw_lock);
CRYPTO_atomic_add64(&lock->qp_group[current_idx].users, new_id, &tmp64,
lock->rw_lock);
/* update the reader index to be the prior qp */
tmp = lock->current_alloc_idx;
InterlockedExchange(&lock->reader_idx, tmp);
/* wake up any waiters */
ossl_crypto_condvar_broadcast(lock->alloc_signal);
ossl_crypto_mutex_unlock(lock->alloc_lock);
return &lock->qp_group[current_idx];
}
static void retire_qp(CRYPTO_RCU_LOCK *lock,
struct rcu_qp *qp)
{
ossl_crypto_mutex_lock(lock->alloc_lock);
lock->writers_alloced--;
ossl_crypto_condvar_broadcast(lock->alloc_signal);
ossl_crypto_mutex_unlock(lock->alloc_lock);
}
void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
{
struct rcu_qp *qp;
uint64_t count;
struct rcu_cb_item *cb_items, *tmpcb;
/* before we do anything else, lets grab the cb list */
cb_items = InterlockedExchangePointer((void * volatile *)&lock->cb_items, NULL);
qp = update_qp(lock);
/* wait for the reader count to reach zero */
do {
CRYPTO_atomic_load(&qp->users, &count, lock->rw_lock);
} while (READER_COUNT(count) != 0);
/* retire in order */
ossl_crypto_mutex_lock(lock->prior_lock);
while (lock->next_to_retire != ID_VAL(count))
ossl_crypto_condvar_wait(lock->prior_signal, lock->prior_lock);
lock->next_to_retire++;
ossl_crypto_condvar_broadcast(lock->prior_signal);
ossl_crypto_mutex_unlock(lock->prior_lock);
retire_qp(lock, qp);
/* handle any callbacks that we have */
while (cb_items != NULL) {
tmpcb = cb_items;
cb_items = cb_items->next;
tmpcb->fn(tmpcb->data);
OPENSSL_free(tmpcb);
}
/* and we're done */
return;
}
int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
{
struct rcu_cb_item *new;
new = OPENSSL_zalloc(sizeof(struct rcu_cb_item));
if (new == NULL)
return 0;
new->data = data;
new->fn = cb;
new->next = InterlockedExchangePointer((void * volatile *)&lock->cb_items, new);
return 1;
}
void *ossl_rcu_uptr_deref(void **p)
{
return (void *)*p;
}
void ossl_rcu_assign_uptr(void **p, void **v)
{
InterlockedExchangePointer((void * volatile *)p, (void *)*v);
}
CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
{
CRYPTO_RWLOCK *lock;
# ifdef USE_RWLOCK
CRYPTO_win_rwlock *rwlock;
if ((lock = OPENSSL_zalloc(sizeof(CRYPTO_win_rwlock))) == NULL)
/* Don't set error, to avoid recursion blowup. */
return NULL;
rwlock = lock;
InitializeSRWLock(&rwlock->lock);
# else
if ((lock = OPENSSL_zalloc(sizeof(CRITICAL_SECTION))) == NULL)
/* Don't set error, to avoid recursion blowup. */
return NULL;
# if !defined(_WIN32_WCE)
/* 0x400 is the spin count value suggested in the documentation */
if (!InitializeCriticalSectionAndSpinCount(lock, 0x400)) {
OPENSSL_free(lock);
return NULL;
}
# else
InitializeCriticalSection(lock);
# endif
# endif
return lock;
}
__owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
{
# ifdef USE_RWLOCK
CRYPTO_win_rwlock *rwlock = lock;
AcquireSRWLockShared(&rwlock->lock);
# else
EnterCriticalSection(lock);
# endif
return 1;
}
__owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
{
# ifdef USE_RWLOCK
CRYPTO_win_rwlock *rwlock = lock;
AcquireSRWLockExclusive(&rwlock->lock);
rwlock->exclusive = 1;
# else
EnterCriticalSection(lock);
# endif
return 1;
}
int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
{
# ifdef USE_RWLOCK
CRYPTO_win_rwlock *rwlock = lock;
if (rwlock->exclusive) {
rwlock->exclusive = 0;
ReleaseSRWLockExclusive(&rwlock->lock);
} else {
ReleaseSRWLockShared(&rwlock->lock);
}
# else
LeaveCriticalSection(lock);
# endif
return 1;
}
void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
{
if (lock == NULL)
return;
# ifndef USE_RWLOCK
DeleteCriticalSection(lock);
# endif
OPENSSL_free(lock);
return;
}
# define ONCE_UNINITED 0
# define ONCE_ININIT 1
# define ONCE_DONE 2
/*
* We don't use InitOnceExecuteOnce because that isn't available in WinXP which
* we still have to support.
*/
int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
{
LONG volatile *lock = (LONG *)once;
LONG result;
if (*lock == ONCE_DONE)
return 1;
do {
result = InterlockedCompareExchange(lock, ONCE_ININIT, ONCE_UNINITED);
if (result == ONCE_UNINITED) {
init();
*lock = ONCE_DONE;
return 1;
}
} while (result == ONCE_ININIT);
return (*lock == ONCE_DONE);
}
int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
{
*key = TlsAlloc();
if (*key == TLS_OUT_OF_INDEXES)
return 0;
return 1;
}
void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
{
DWORD last_error;
void *ret;
/*
* TlsGetValue clears the last error even on success, so that callers may
* distinguish it successfully returning NULL or failing. It is documented
* to never fail if the argument is a valid index from TlsAlloc, so we do
* not need to handle this.
*
* However, this error-mangling behavior interferes with the caller's use of
* GetLastError. In particular SSL_get_error queries the error queue to
* determine whether the caller should look at the OS's errors. To avoid
* destroying state, save and restore the Windows error.
*
* https://msdn.microsoft.com/en-us/library/windows/desktop/ms686812(v=vs.85).aspx
*/
last_error = GetLastError();
ret = TlsGetValue(*key);
SetLastError(last_error);
return ret;
}
int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
{
if (TlsSetValue(*key, val) == 0)
return 0;
return 1;
}
int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
{
if (TlsFree(*key) == 0)
return 0;
return 1;
}
CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
{
return GetCurrentThreadId();
}
int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
{
return (a == b);
}
int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
{
*ret = (int)InterlockedExchangeAdd((long volatile *)val, (long)amount) + amount;
return 1;
}
int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret,
CRYPTO_RWLOCK *lock)
{
#if (defined(NO_INTERLOCKEDOR64))
if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
return 0;
*val += op;
*ret = *val;
if (!CRYPTO_THREAD_unlock(lock))
return 0;
return 1;
#else
*ret = (uint64_t)InterlockedAdd64((LONG64 volatile *)val, (LONG64)op);
return 1;
#endif
}
int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret,
CRYPTO_RWLOCK *lock)
{
#if (defined(NO_INTERLOCKEDOR64))
if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
return 0;
*val &= op;
*ret = *val;
if (!CRYPTO_THREAD_unlock(lock))
return 0;
return 1;
#else
*ret = (uint64_t)InterlockedAnd64((LONG64 volatile *)val, (LONG64)op) & op;
return 1;
#endif
}
int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
CRYPTO_RWLOCK *lock)
{
#if (defined(NO_INTERLOCKEDOR64))
if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
return 0;
*val |= op;
*ret = *val;
if (!CRYPTO_THREAD_unlock(lock))
return 0;
return 1;
#else
*ret = (uint64_t)InterlockedOr64((LONG64 volatile *)val, (LONG64)op) | op;
return 1;
#endif
}
int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
{
#if (defined(NO_INTERLOCKEDOR64))
if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
return 0;
*ret = *val;
if (!CRYPTO_THREAD_unlock(lock))
return 0;
return 1;
#else
*ret = (uint64_t)InterlockedOr64((LONG64 volatile *)val, 0);
return 1;
#endif
}
int CRYPTO_atomic_store(uint64_t *dst, uint64_t val, CRYPTO_RWLOCK *lock)
{
#if (defined(NO_INTERLOCKEDOR64))
if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
return 0;
*dst = val;
if (!CRYPTO_THREAD_unlock(lock))
return 0;
return 1;
#else
InterlockedExchange64(dst, val);
return 1;
#endif
}
int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
{
#if (defined(NO_INTERLOCKEDOR64))
if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
return 0;
*ret = *val;
if (!CRYPTO_THREAD_unlock(lock))
return 0;
return 1;
#else
/* On Windows, LONG is always the same size as int. */
*ret = (int)InterlockedOr((LONG volatile *)val, 0);
return 1;
#endif
}
int openssl_init_fork_handlers(void)
{
return 0;
}
int openssl_get_fork_id(void)
{
return 0;
}
#endif