lck-windows.c 27 KB
1 /*
2  * Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
3  * and other libmdbx authors: please see AUTHORS file.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted only as authorized by the OpenLDAP
8  * Public License.
9  *
10  * A copy of this license is available in the file LICENSE in the
11  * top-level directory of the distribution or, alternatively, at
12  * <http://www.OpenLDAP.org/license.html>.
13  */
14
15 #if defined(_WIN32) || defined(_WIN64) /* Windows LCK-implementation */
16
17 /* PREAMBLE FOR WINDOWS:
18  *
19  * We are not concerned for performance here.
20  * If you are running Windows a performance could NOT be the goal.
21  * Otherwise please use Linux. */
22
23 #include "internals.h"
24
25 static void mdbx_winnt_import(void);
26
27 #if MDBX_BUILD_SHARED_LIBRARY
28 #if MDBX_WITHOUT_MSVC_CRT && defined(NDEBUG)
29 /* DEBUG/CHECKED builds still require MSVC's CRT for runtime checks.
30  *
31  * Define dll's entry point only for Release build when NDEBUG is defined and
32  * MDBX_WITHOUT_MSVC_CRT=ON. if the entry point isn't defined then MSVC's will
33  * automatically use DllMainCRTStartup() from CRT library, which also
34  * automatically call DllMain() from our mdbx.dll */
35 #pragma comment(linker, "/ENTRY:DllMain")
36 #endif /* MDBX_WITHOUT_MSVC_CRT */
37
38 BOOL APIENTRY DllMain(HANDLE module, DWORD reason, LPVOID reserved)
39 #else
40 #if !MDBX_MANUAL_MODULE_HANDLER
41 static
42 #endif /* !MDBX_MANUAL_MODULE_HANDLER */
43     void NTAPI
44     mdbx_module_handler(PVOID module, DWORD reason, PVOID reserved)
45 #endif /* MDBX_BUILD_SHARED_LIBRARY */
46 {
47   (void)reserved;
48   switch (reason) {
49   case DLL_PROCESS_ATTACH:
50     mdbx_winnt_import();
51     mdbx_rthc_global_init();
52     break;
53   case DLL_PROCESS_DETACH:
54     mdbx_rthc_global_dtor();
55     break;
56
57   case DLL_THREAD_ATTACH:
58     break;
59   case DLL_THREAD_DETACH:
60     mdbx_rthc_thread_dtor(module);
61     break;
62   }
63 #if MDBX_BUILD_SHARED_LIBRARY
64   return TRUE;
65 #endif
66 }
67
68 #if !MDBX_BUILD_SHARED_LIBRARY && !MDBX_MANUAL_MODULE_HANDLER
69 /* *INDENT-OFF* */
70 /* clang-format off */
71 #if defined(_MSC_VER)
72 # pragma const_seg(push)
73 # pragma data_seg(push)
74
75 # ifndef _M_IX86
76      /* kick a linker to create the TLS directory if not already done */
77 # pragma comment(linker, "/INCLUDE:_tls_used")
78      /* Force some symbol references. */
79 # pragma comment(linker, "/INCLUDE:mdbx_tls_anchor")
80      /* specific const-segment for WIN64 */
81 # pragma const_seg(".CRT$XLB")
82      const
83 # else
84      /* kick a linker to create the TLS directory if not already done */
85 # pragma comment(linker, "/INCLUDE:__tls_used")
86      /* Force some symbol references. */
87 # pragma comment(linker, "/INCLUDE:_mdbx_tls_anchor")
88      /* specific data-segment for WIN32 */
89 # pragma data_seg(".CRT$XLB")
90 # endif
91
92    __declspec(allocate(".CRT$XLB")) PIMAGE_TLS_CALLBACK mdbx_tls_anchor = mdbx_module_handler;
93 # pragma data_seg(pop)
94 # pragma const_seg(pop)
95
96 #elif defined(__GNUC__)
97 # ifndef _M_IX86
98      const
99 # endif
100    PIMAGE_TLS_CALLBACK mdbx_tls_anchor __attribute__((__section__(".CRT$XLB"), used)) = mdbx_module_handler;
101 #else
102 # error FIXME
103 #endif
104 /* *INDENT-ON* */
105 /* clang-format on */
106 #endif /* !MDBX_BUILD_SHARED_LIBRARY && !MDBX_MANUAL_MODULE_HANDLER */
107
108 /*----------------------------------------------------------------------------*/
109
110 #define LCK_SHARED 0
111 #define LCK_EXCLUSIVE LOCKFILE_EXCLUSIVE_LOCK
112 #define LCK_WAITFOR 0
113 #define LCK_DONTWAIT LOCKFILE_FAIL_IMMEDIATELY
114
115 static __inline BOOL flock(mdbx_filehandle_t fd, DWORD flags, uint64_t offset,
116                            size_t bytes) {
117   OVERLAPPED ov;
118   ov.hEvent = 0;
119   ov.Offset = (DWORD)offset;
120   ov.OffsetHigh = HIGH_DWORD(offset);
121   return LockFileEx(fd, flags, 0, (DWORD)bytes, HIGH_DWORD(bytes), &ov);
122 }
123
124 static __inline BOOL funlock(mdbx_filehandle_t fd, uint64_t offset,
125                              size_t bytes) {
126   return UnlockFile(fd, (DWORD)offset, HIGH_DWORD(offset), (DWORD)bytes,
127                     HIGH_DWORD(bytes));
128 }
129
130 /*----------------------------------------------------------------------------*/
131 /* global `write` lock for write-txt processing,
132  * exclusive locking both meta-pages) */
133
134 #define LCK_MAXLEN (1u + ((~(size_t)0) >> 1))
135 #define LCK_META_OFFSET 0
136 #define LCK_META_LEN (MAX_PAGESIZE * NUM_METAS)
137 #define LCK_BODY_OFFSET LCK_META_LEN
138 #define LCK_BODY_LEN (LCK_MAXLEN - LCK_BODY_OFFSET)
139 #define LCK_BODY LCK_BODY_OFFSET, LCK_BODY_LEN
140 #define LCK_WHOLE 0, LCK_MAXLEN
141
142 int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
143   if (dontwait) {
144     if (!TryEnterCriticalSection(&env->me_windowsbug_lock))
145       return MDBX_BUSY;
146   } else {
147     __try {
148       EnterCriticalSection(&env->me_windowsbug_lock);
149     }
150     __except ((GetExceptionCode() ==
151                  0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */)
152                     ? EXCEPTION_EXECUTE_HANDLER
153                     : EXCEPTION_CONTINUE_SEARCH) {
154       return ERROR_POSSIBLE_DEADLOCK;
155     }
156   }
157
158   if ((env->me_flags & MDBX_EXCLUSIVE) ||
159       flock(env->me_lazy_fd,
160             dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT)
161                      : (LCK_EXCLUSIVE | LCK_WAITFOR),
162             LCK_BODY))
163     return MDBX_SUCCESS;
164   int rc = (int)GetLastError();
165   LeaveCriticalSection(&env->me_windowsbug_lock);
166   return (!dontwait || rc != ERROR_LOCK_VIOLATION) ? rc : MDBX_BUSY;
167 }
168
169 void mdbx_txn_unlock(MDBX_env *env) {
170   int rc = (env->me_flags & MDBX_EXCLUSIVE)
171                ? TRUE
172                : funlock(env->me_lazy_fd, LCK_BODY);
173   LeaveCriticalSection(&env->me_windowsbug_lock);
174   if (!rc)
175     mdbx_panic("%s failed: err %u", __func__, (int)GetLastError());
176 }
177
178 /*----------------------------------------------------------------------------*/
179 /* global `read` lock for readers registration,
180  * exclusive locking `mti_numreaders` (second) cacheline */
181
182 #define LCK_LO_OFFSET 0
183 #define LCK_LO_LEN offsetof(MDBX_lockinfo, mti_numreaders)
184 #define LCK_UP_OFFSET LCK_LO_LEN
185 #define LCK_UP_LEN (sizeof(MDBX_lockinfo) - LCK_UP_OFFSET)
186 #define LCK_LOWER LCK_LO_OFFSET, LCK_LO_LEN
187 #define LCK_UPPER LCK_UP_OFFSET, LCK_UP_LEN
188
189 MDBX_INTERNAL_FUNC int mdbx_rdt_lock(MDBX_env *env) {
190   mdbx_srwlock_AcquireShared(&env->me_remap_guard);
191   if (env->me_lfd == INVALID_HANDLE_VALUE)
192     return MDBX_SUCCESS; /* readonly database in readonly filesystem */
193
194   /* transition from S-? (used) to S-E (locked),
195    * e.g. exclusive lock upper-part */
196   if ((env->me_flags & MDBX_EXCLUSIVE) ||
197       flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER))
198     return MDBX_SUCCESS;
199
200   int rc = (int)GetLastError();
201   mdbx_srwlock_ReleaseShared(&env->me_remap_guard);
202   return rc;
203 }
204
205 MDBX_INTERNAL_FUNC void mdbx_rdt_unlock(MDBX_env *env) {
206   if (env->me_lfd != INVALID_HANDLE_VALUE) {
207     /* transition from S-E (locked) to S-? (used), e.g. unlock upper-part */
208     if ((env->me_flags & MDBX_EXCLUSIVE) == 0 &&
209         !funlock(env->me_lfd, LCK_UPPER))
210       mdbx_panic("%s failed: err %u", __func__, (int)GetLastError());
211   }
212   mdbx_srwlock_ReleaseShared(&env->me_remap_guard);
213 }
214
215 MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait) {
216   return flock(fd,
217                wait ? LCK_EXCLUSIVE | LCK_WAITFOR
218                     : LCK_EXCLUSIVE | LCK_DONTWAIT,
219                0, LCK_MAXLEN)
220              ? MDBX_SUCCESS
221              : (int)GetLastError();
222 }
223
224 static int suspend_and_append(mdbx_handle_array_t **array,
225                               const DWORD ThreadId) {
226   const unsigned limit = (*array)->limit;
227   if ((*array)->count == limit) {
228     void *ptr = mdbx_realloc(
229         (limit > ARRAY_LENGTH((*array)->handles))
230             ? *array
231             : /* don't free initial array on the stack */ NULL,
232         sizeof(mdbx_handle_array_t) +
233             sizeof(HANDLE) * (limit * 2 - ARRAY_LENGTH((*array)->handles)));
234     if (!ptr)
235       return MDBX_ENOMEM;
236     if (limit == ARRAY_LENGTH((*array)->handles))
237       memcpy(ptr, *array, sizeof(mdbx_handle_array_t));
238     *array = (mdbx_handle_array_t *)ptr;
239     (*array)->limit = limit * 2;
240   }
241
242   HANDLE hThread = OpenThread(THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION,
243                               FALSE, ThreadId);
244   if (hThread == NULL)
245     return (int)GetLastError();
246
247   if (SuspendThread(hThread) == (DWORD)-1) {
248     int err = (int)GetLastError();
249     DWORD ExitCode;
250     if (err == /* workaround for Win10 UCRT bug */ ERROR_ACCESS_DENIED ||
251         !GetExitCodeThread(hThread, &ExitCode) || ExitCode != STILL_ACTIVE)
252       err = MDBX_SUCCESS;
253     CloseHandle(hThread);
254     return err;
255   }
256
257   (*array)->handles[(*array)->count++] = hThread;
258   return MDBX_SUCCESS;
259 }
260
261 MDBX_INTERNAL_FUNC int
262 mdbx_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) {
263   mdbx_assert(env, (env->me_flags & MDBX_NOTLS) == 0);
264   const uintptr_t CurrentTid = GetCurrentThreadId();
265   int rc;
266   if (env->me_lck_mmap.lck) {
267     /* Scan LCK for threads of the current process */
268     const MDBX_reader *const begin = env->me_lck_mmap.lck->mti_readers;
269     const MDBX_reader *const end =
270         begin +
271         atomic_load32(&env->me_lck_mmap.lck->mti_numreaders, mo_AcquireRelease);
272     const uintptr_t WriteTxnOwner = env->me_txn0 ? env->me_txn0->mt_owner : 0;
273     for (const MDBX_reader *reader = begin; reader < end; ++reader) {
274       if (reader->mr_pid.weak != env->me_pid || !reader->mr_tid.weak) {
275       skip_lck:
276         continue;
277       }
278       if (reader->mr_tid.weak == CurrentTid ||
279           reader->mr_tid.weak == WriteTxnOwner)
280         goto skip_lck;
281
282       rc = suspend_and_append(array, (mdbx_tid_t)reader->mr_tid.weak);
283       if (rc != MDBX_SUCCESS) {
284       bailout_lck:
285         (void)mdbx_resume_threads_after_remap(*array);
286         return rc;
287       }
288     }
289     if (WriteTxnOwner && WriteTxnOwner != CurrentTid) {
290       rc = suspend_and_append(array, (mdbx_tid_t)WriteTxnOwner);
291       if (rc != MDBX_SUCCESS)
292         goto bailout_lck;
293     }
294   } else {
295     /* Without LCK (i.e. read-only mode).
296      * Walk through a snapshot of all running threads */
297     mdbx_assert(env, env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY));
298     const HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
299     if (hSnapshot == INVALID_HANDLE_VALUE)
300       return (int)GetLastError();
301
302     THREADENTRY32 entry;
303     entry.dwSize = sizeof(THREADENTRY32);
304
305     if (!Thread32First(hSnapshot, &entry)) {
306       rc = (int)GetLastError();
307     bailout_toolhelp:
308       CloseHandle(hSnapshot);
309       (void)mdbx_resume_threads_after_remap(*array);
310       return rc;
311     }
312
313     do {
314       if (entry.th32OwnerProcessID != env->me_pid ||
315           entry.th32ThreadID == CurrentTid)
316         continue;
317
318       rc = suspend_and_append(array, entry.th32ThreadID);
319       if (rc != MDBX_SUCCESS)
320         goto bailout_toolhelp;
321
322     } while (Thread32Next(hSnapshot, &entry));
323
324     rc = (int)GetLastError();
325     if (rc != ERROR_NO_MORE_FILES)
326       goto bailout_toolhelp;
327     CloseHandle(hSnapshot);
328   }
329
330   return MDBX_SUCCESS;
331 }
332
333 MDBX_INTERNAL_FUNC int
334 mdbx_resume_threads_after_remap(mdbx_handle_array_t *array) {
335   int rc = MDBX_SUCCESS;
336   for (unsigned i = 0; i < array->count; ++i) {
337     const HANDLE hThread = array->handles[i];
338     if (ResumeThread(hThread) == (DWORD)-1) {
339       const int err = (int)GetLastError();
340       DWORD ExitCode;
341       if (err != /* workaround for Win10 UCRT bug */ ERROR_ACCESS_DENIED &&
342           GetExitCodeThread(hThread, &ExitCode) && ExitCode == STILL_ACTIVE)
343         rc = err;
344     }
345     CloseHandle(hThread);
346   }
347   return rc;
348 }
349
350 /*----------------------------------------------------------------------------*/
351 /* global `initial` lock for lockfile initialization,
352  * exclusive/shared locking first cacheline */
353
354 /* Briefly description of locking schema/algorithm:
355  * - Windows does not support upgrading or downgrading for file locking.
356  * - Therefore upgrading/downgrading is emulated by shared and exclusive
357  * locking of upper and lower halves.
358  * - In other words, we have FSM with possible 9 states,
359  * i.e. free/shared/exclusive x free/shared/exclusive == 9.
360  * Only 6 states of FSM are used, which 2 of ones are transitive.
361  *
362  * States:
363  * ?-? = free, i.e. unlocked
364  * S-? = used, i.e. shared lock
365  * E-? = exclusive-read, i.e. operational exclusive
366  * ?-S
367  * ?-E = middle (transitive state)
368  * S-S
369  * S-E = locked (transitive state)
370  * E-S
371  * E-E = exclusive-write, i.e. exclusive due (re)initialization
372  *
373  * The mdbx_lck_seize() moves the locking-FSM from the initial free/unlocked
374  * state to the "exclusive write" (and returns MDBX_RESULT_TRUE) if possible,
375  * or to the "used" (and returns MDBX_RESULT_FALSE).
376  *
377  * The mdbx_lck_downgrade() moves the locking-FSM from "exclusive write"
378  * state to the "used" (i.e. shared) state.
379  *
380  * The mdbx_lck_upgrade() moves the locking-FSM from "used" (i.e. shared)
381  * state to the "exclusive write" state.
382  */
383
384 static void lck_unlock(MDBX_env *env) {
385   int err;
386
387   if (env->me_lfd != INVALID_HANDLE_VALUE) {
388     /* double `unlock` for robustly remove overlapped shared/exclusive locks */
389     while (funlock(env->me_lfd, LCK_LOWER))
390       ;
391     err = (int)GetLastError();
392     assert(err == ERROR_NOT_LOCKED ||
393            (mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION));
394     (void)err;
395     SetLastError(ERROR_SUCCESS);
396
397     while (funlock(env->me_lfd, LCK_UPPER))
398       ;
399     err = (int)GetLastError();
400     assert(err == ERROR_NOT_LOCKED ||
401            (mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION));
402     (void)err;
403     SetLastError(ERROR_SUCCESS);
404   }
405
406   if (env->me_lazy_fd != INVALID_HANDLE_VALUE) {
407     /* explicitly unlock to avoid latency for other processes (windows kernel
408      * releases such locks via deferred queues) */
409     while (funlock(env->me_lazy_fd, LCK_BODY))
410       ;
411     err = (int)GetLastError();
412     assert(err == ERROR_NOT_LOCKED ||
413            (mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION));
414     (void)err;
415     SetLastError(ERROR_SUCCESS);
416
417     while (funlock(env->me_lazy_fd, LCK_WHOLE))
418       ;
419     err = (int)GetLastError();
420     assert(err == ERROR_NOT_LOCKED ||
421            (mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION));
422     (void)err;
423     SetLastError(ERROR_SUCCESS);
424   }
425 }
426
427 /* Seize state as 'exclusive-write' (E-E and returns MDBX_RESULT_TRUE)
428  * or as 'used' (S-? and returns MDBX_RESULT_FALSE).
429  * Otherwise returns an error. */
430 static int internal_seize_lck(HANDLE lfd) {
431   int rc;
432   assert(lfd != INVALID_HANDLE_VALUE);
433
434   /* 1) now on ?-? (free), get ?-E (middle) */
435   mdbx_jitter4testing(false);
436   if (!flock(lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER)) {
437     rc = (int)GetLastError() /* 2) something went wrong, give up */;
438     mdbx_error("%s, err %u", "?-?(free) >> ?-E(middle)", rc);
439     return rc;
440   }
441
442   /* 3) now on ?-E (middle), try E-E (exclusive-write) */
443   mdbx_jitter4testing(false);
444   if (flock(lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER))
445     return MDBX_RESULT_TRUE /* 4) got E-E (exclusive-write), done */;
446
447   /* 5) still on ?-E (middle) */
448   rc = (int)GetLastError();
449   mdbx_jitter4testing(false);
450   if (rc != ERROR_SHARING_VIOLATION && rc != ERROR_LOCK_VIOLATION) {
451     /* 6) something went wrong, give up */
452     if (!funlock(lfd, LCK_UPPER))
453       mdbx_panic("%s(%s) failed: err %u", __func__, "?-E(middle) >> ?-?(free)",
454                  (int)GetLastError());
455     return rc;
456   }
457
458   /* 7) still on ?-E (middle), try S-E (locked) */
459   mdbx_jitter4testing(false);
460   rc = flock(lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER) ? MDBX_RESULT_FALSE
461                                                         : (int)GetLastError();
462
463   mdbx_jitter4testing(false);
464   if (rc != MDBX_RESULT_FALSE)
465     mdbx_error("%s, err %u", "?-E(middle) >> S-E(locked)", rc);
466
467   /* 8) now on S-E (locked) or still on ?-E (middle),
468    * transition to S-? (used) or ?-? (free) */
469   if (!funlock(lfd, LCK_UPPER))
470     mdbx_panic("%s(%s) failed: err %u", __func__,
471                "X-E(locked/middle) >> X-?(used/free)", (int)GetLastError());
472
473   /* 9) now on S-? (used, DONE) or ?-? (free, FAILURE) */
474   return rc;
475 }
476
477 MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
478   int rc;
479
480   assert(env->me_lazy_fd != INVALID_HANDLE_VALUE);
481   if (env->me_flags & MDBX_EXCLUSIVE)
482     return MDBX_RESULT_TRUE /* nope since files were must be opened
483                                non-shareable */
484         ;
485
486   if (env->me_lfd == INVALID_HANDLE_VALUE) {
487     /* LY: without-lck mode (e.g. on read-only filesystem) */
488     mdbx_jitter4testing(false);
489     if (!flock(env->me_lazy_fd, LCK_SHARED | LCK_DONTWAIT, LCK_WHOLE)) {
490       rc = (int)GetLastError();
491       mdbx_error("%s, err %u", "without-lck", rc);
492       return rc;
493     }
494     return MDBX_RESULT_FALSE;
495   }
496
497   rc = internal_seize_lck(env->me_lfd);
498   mdbx_jitter4testing(false);
499   if (rc == MDBX_RESULT_TRUE && (env->me_flags & MDBX_RDONLY) == 0) {
500     /* Check that another process don't operates in without-lck mode.
501      * Doing such check by exclusive locking the body-part of db. Should be
502      * noted:
503      * - we need an exclusive lock for do so;
504      * - we can't lock meta-pages, otherwise other process could get an error
505      * while opening db in valid (non-conflict) mode. */
506     if (!flock(env->me_lazy_fd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_BODY)) {
507       rc = (int)GetLastError();
508       mdbx_error("%s, err %u", "lock-against-without-lck", rc);
509       mdbx_jitter4testing(false);
510       lck_unlock(env);
511     } else {
512       mdbx_jitter4testing(false);
513       if (!funlock(env->me_lazy_fd, LCK_BODY))
514         mdbx_panic("%s(%s) failed: err %u", __func__,
515                    "unlock-against-without-lck", (int)GetLastError());
516     }
517   }
518
519   return rc;
520 }
521
522 MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
523   /* Transite from exclusive-write state (E-E) to used (S-?) */
524   assert(env->me_lazy_fd != INVALID_HANDLE_VALUE);
525   assert(env->me_lfd != INVALID_HANDLE_VALUE);
526
527   if (env->me_flags & MDBX_EXCLUSIVE)
528     return MDBX_SUCCESS /* nope since files were must be opened non-shareable */
529         ;
530   /* 1) now at E-E (exclusive-write), transition to ?_E (middle) */
531   if (!funlock(env->me_lfd, LCK_LOWER))
532     mdbx_panic("%s(%s) failed: err %u", __func__,
533                "E-E(exclusive-write) >> ?-E(middle)", (int)GetLastError());
534
535   /* 2) now at ?-E (middle), transition to S-E (locked) */
536   if (!flock(env->me_lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER)) {
537     int rc = (int)GetLastError() /* 3) something went wrong, give up */;
538     mdbx_error("%s, err %u", "?-E(middle) >> S-E(locked)", rc);
539     return rc;
540   }
541
542   /* 4) got S-E (locked), continue transition to S-? (used) */
543   if (!funlock(env->me_lfd, LCK_UPPER))
544     mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> S-?(used)",
545                (int)GetLastError());
546
547   return MDBX_SUCCESS /* 5) now at S-? (used), done */;
548 }
549
550 MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) {
551   /* Transite from used state (S-?) to exclusive-write (E-E) */
552   assert(env->me_lfd != INVALID_HANDLE_VALUE);
553
554   if (env->me_flags & MDBX_EXCLUSIVE)
555     return MDBX_SUCCESS /* nope since files were must be opened non-shareable */
556         ;
557
558   int rc;
559   /* 1) now on S-? (used), try S-E (locked) */
560   mdbx_jitter4testing(false);
561   if (!flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_UPPER)) {
562     rc = (int)GetLastError() /* 2) something went wrong, give up */;
563     mdbx_verbose("%s, err %u", "S-?(used) >> S-E(locked)", rc);
564     return rc;
565   }
566
567   /* 3) now on S-E (locked), transition to ?-E (middle) */
568   if (!funlock(env->me_lfd, LCK_LOWER))
569     mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> ?-E(middle)",
570                (int)GetLastError());
571
572   /* 4) now on ?-E (middle), try E-E (exclusive-write) */
573   mdbx_jitter4testing(false);
574   if (!flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER)) {
575     rc = (int)GetLastError() /* 5) something went wrong, give up */;
576     mdbx_verbose("%s, err %u", "?-E(middle) >> E-E(exclusive-write)", rc);
577     return rc;
578   }
579
580   return MDBX_SUCCESS /* 6) now at E-E (exclusive-write), done */;
581 }
582
583 MDBX_INTERNAL_FUNC int mdbx_lck_init(MDBX_env *env,
584                                      MDBX_env *inprocess_neighbor,
585                                      int global_uniqueness_flag) {
586   (void)env;
587   (void)inprocess_neighbor;
588   (void)global_uniqueness_flag;
589   return MDBX_SUCCESS;
590 }
591
592 MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
593                                         MDBX_env *inprocess_neighbor) {
594   /* LY: should unmap before releasing the locks to avoid race condition and
595    * STATUS_USER_MAPPED_FILE/ERROR_USER_MAPPED_FILE */
596   if (env->me_map)
597     mdbx_munmap(&env->me_dxb_mmap);
598   if (env->me_lck_mmap.lck) {
599     const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0;
600     mdbx_munmap(&env->me_lck_mmap);
601     if (synced && !inprocess_neighbor && env->me_lfd != INVALID_HANDLE_VALUE &&
602         mdbx_lck_upgrade(env) == MDBX_SUCCESS)
603       /* this will fail if LCK is used/mmapped by other process(es) */
604       mdbx_ftruncate(env->me_lfd, 0);
605   }
606   lck_unlock(env);
607   return MDBX_SUCCESS;
608 }
609
610 /*----------------------------------------------------------------------------*/
611 /* reader checking (by pid) */
612
613 MDBX_INTERNAL_FUNC int mdbx_rpid_set(MDBX_env *env) {
614   (void)env;
615   return MDBX_SUCCESS;
616 }
617
618 MDBX_INTERNAL_FUNC int mdbx_rpid_clear(MDBX_env *env) {
619   (void)env;
620   return MDBX_SUCCESS;
621 }
622
623 /* Checks reader by pid.
624  *
625  * Returns:
626  * MDBX_RESULT_TRUE, if pid is live (unable to acquire lock)
627  * MDBX_RESULT_FALSE, if pid is dead (lock acquired)
628  * or otherwise the errcode. */
629 MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, uint32_t pid) {
630   (void)env;
631   HANDLE hProcess = OpenProcess(SYNCHRONIZE, FALSE, pid);
632   int rc;
633   if (likely(hProcess)) {
634     rc = WaitForSingleObject(hProcess, 0);
635     if (unlikely(rc == (int)WAIT_FAILED))
636       rc = (int)GetLastError();
637     CloseHandle(hProcess);
638   } else {
639     rc = (int)GetLastError();
640   }
641
642   switch (rc) {
643   case ERROR_INVALID_PARAMETER:
644     /* pid seems invalid */
645     return MDBX_RESULT_FALSE;
646   case WAIT_OBJECT_0:
647     /* process just exited */
648     return MDBX_RESULT_FALSE;
649   case ERROR_ACCESS_DENIED:
650     /* The ERROR_ACCESS_DENIED would be returned for CSRSS-processes, etc.
651      * assume pid exists */
652     return MDBX_RESULT_TRUE;
653   case WAIT_TIMEOUT:
654     /* pid running */
655     return MDBX_RESULT_TRUE;
656   default:
657     /* failure */
658     return rc;
659   }
660 }
661
662 //----------------------------------------------------------------------------
663 // Stub for slim read-write lock
664 // Copyright (C) 1995-2002 Brad Wilson
665
666 static void WINAPI stub_srwlock_Init(MDBX_srwlock *srwl) {
667   srwl->readerCount = srwl->writerCount = 0;
668 }
669
670 static void WINAPI stub_srwlock_AcquireShared(MDBX_srwlock *srwl) {
671   while (true) {
672     assert(srwl->writerCount >= 0 && srwl->readerCount >= 0);
673
674     // If there's a writer already, spin without unnecessarily
675     // interlocking the CPUs
676     if (srwl->writerCount != 0) {
677       YieldProcessor();
678       continue;
679     }
680
681     // Add to the readers list
682     _InterlockedIncrement(&srwl->readerCount);
683
684     // Check for writers again (we may have been preempted). If
685     // there are no writers writing or waiting, then we're done.
686     if (srwl->writerCount == 0)
687       break;
688
689     // Remove from the readers list, spin, try again
690     _InterlockedDecrement(&srwl->readerCount);
691     YieldProcessor();
692   }
693 }
694
695 static void WINAPI stub_srwlock_ReleaseShared(MDBX_srwlock *srwl) {
696   assert(srwl->readerCount > 0);
697   _InterlockedDecrement(&srwl->readerCount);
698 }
699
700 static void WINAPI stub_srwlock_AcquireExclusive(MDBX_srwlock *srwl) {
701   while (true) {
702     assert(srwl->writerCount >= 0 && srwl->readerCount >= 0);
703
704     // If there's a writer already, spin without unnecessarily
705     // interlocking the CPUs
706     if (srwl->writerCount != 0) {
707       YieldProcessor();
708       continue;
709     }
710
711     // See if we can become the writer (expensive, because it inter-
712     // locks the CPUs, so writing should be an infrequent process)
713     if (_InterlockedExchange(&srwl->writerCount, 1) == 0)
714       break;
715   }
716
717   // Now we're the writer, but there may be outstanding readers.
718   // Spin until there aren't any more; new readers will wait now
719   // that we're the writer.
720   while (srwl->readerCount != 0) {
721     assert(srwl->writerCount >= 0 && srwl->readerCount >= 0);
722     YieldProcessor();
723   }
724 }
725
726 static void WINAPI stub_srwlock_ReleaseExclusive(MDBX_srwlock *srwl) {
727   assert(srwl->writerCount == 1 && srwl->readerCount >= 0);
728   srwl->writerCount = 0;
729 }
730
731 static uint64_t WINAPI stub_GetTickCount64(void) {
732   LARGE_INTEGER Counter, Frequency;
733   return (QueryPerformanceFrequency(&Frequency) &&
734           QueryPerformanceCounter(&Counter))
735              ? Counter.QuadPart * 1000ul / Frequency.QuadPart
736              : 0;
737 }
738
739 /*----------------------------------------------------------------------------*/
740
741 #ifndef xMDBX_ALLOY
742 MDBX_srwlock_function mdbx_srwlock_Init, mdbx_srwlock_AcquireShared,
743     mdbx_srwlock_ReleaseShared, mdbx_srwlock_AcquireExclusive,
744     mdbx_srwlock_ReleaseExclusive;
745
746 MDBX_NtExtendSection mdbx_NtExtendSection;
747 MDBX_GetFileInformationByHandleEx mdbx_GetFileInformationByHandleEx;
748 MDBX_GetVolumeInformationByHandleW mdbx_GetVolumeInformationByHandleW;
749 MDBX_GetFinalPathNameByHandleW mdbx_GetFinalPathNameByHandleW;
750 MDBX_SetFileInformationByHandle mdbx_SetFileInformationByHandle;
751 MDBX_NtFsControlFile mdbx_NtFsControlFile;
752 MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory;
753 MDBX_GetTickCount64 mdbx_GetTickCount64;
754 MDBX_RegGetValueA mdbx_RegGetValueA;
755 #endif /* xMDBX_ALLOY */
756
757 #if __GNUC_PREREQ(8, 0)
758 #pragma GCC diagnostic push
759 #pragma GCC diagnostic ignored "-Wcast-function-type"
760 #endif /* GCC/MINGW */
761
762 static void mdbx_winnt_import(void) {
763   const HINSTANCE hNtdll = GetModuleHandleA("ntdll.dll");
764
765 #define GET_PROC_ADDR(dll, ENTRY) \
766 mdbx_##ENTRY = (MDBX_##ENTRY)GetProcAddress(dll, #ENTRY)
767
768   if (GetProcAddress(hNtdll, "wine_get_version")) {
769     assert(mdbx_RunningUnderWine());
770   } else {
771     GET_PROC_ADDR(hNtdll, NtFsControlFile);
772     GET_PROC_ADDR(hNtdll, NtExtendSection);
773     assert(!mdbx_RunningUnderWine());
774   }
775
776   const HINSTANCE hKernel32dll = GetModuleHandleA("kernel32.dll");
777   GET_PROC_ADDR(hKernel32dll, GetFileInformationByHandleEx);
778   GET_PROC_ADDR(hKernel32dll, GetTickCount64);
779   if (!mdbx_GetTickCount64)
780     mdbx_GetTickCount64 = stub_GetTickCount64;
781   if (!mdbx_RunningUnderWine()) {
782     GET_PROC_ADDR(hKernel32dll, SetFileInformationByHandle);
783     GET_PROC_ADDR(hKernel32dll, GetVolumeInformationByHandleW);
784     GET_PROC_ADDR(hKernel32dll, GetFinalPathNameByHandleW);
785     GET_PROC_ADDR(hKernel32dll, PrefetchVirtualMemory);
786   }
787
788   const HINSTANCE hAdvapi32dll = GetModuleHandleA("advapi32.dll");
789   GET_PROC_ADDR(hAdvapi32dll, RegGetValueA);
790 #undef GET_PROC_ADDR
791
792   const MDBX_srwlock_function init =
793       (MDBX_srwlock_function)GetProcAddress(hKernel32dll, "InitializeSRWLock");
794   if (init != NULL) {
795     mdbx_srwlock_Init = init;
796     mdbx_srwlock_AcquireShared = (MDBX_srwlock_function)GetProcAddress(
797         hKernel32dll, "AcquireSRWLockShared");
798     mdbx_srwlock_ReleaseShared = (MDBX_srwlock_function)GetProcAddress(
799         hKernel32dll, "ReleaseSRWLockShared");
800     mdbx_srwlock_AcquireExclusive = (MDBX_srwlock_function)GetProcAddress(
801         hKernel32dll, "AcquireSRWLockExclusive");
802     mdbx_srwlock_ReleaseExclusive = (MDBX_srwlock_function)GetProcAddress(
803         hKernel32dll, "ReleaseSRWLockExclusive");
804   } else {
805     mdbx_srwlock_Init = stub_srwlock_Init;
806     mdbx_srwlock_AcquireShared = stub_srwlock_AcquireShared;
807     mdbx_srwlock_ReleaseShared = stub_srwlock_ReleaseShared;
808     mdbx_srwlock_AcquireExclusive = stub_srwlock_AcquireExclusive;
809     mdbx_srwlock_ReleaseExclusive = stub_srwlock_ReleaseExclusive;
810   }
811 }
812
813 #if __GNUC_PREREQ(8, 0)
814 #pragma GCC diagnostic pop
815 #endif /* GCC/MINGW */
816
817 #endif /* Windows LCK-implementation */