1 /*
2 ** 2001 September 16
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains code that is specific to particular operating
14 ** systems. The purpose of this file is to provide a uniform abstraction
15 ** on which the rest of SQLite can operate.
16 */
17 #include "os.h" /* Must be first to enable large file support */
18 #include "sqliteInt.h"
19
20 #if OS_UNIX
21 # include <time.h>
22 # include <errno.h>
23 # include <unistd.h>
24 # ifndef O_LARGEFILE
25 # define O_LARGEFILE 0
26 # endif
27 # ifdef SQLITE_DISABLE_LFS
28 # undef O_LARGEFILE
29 # define O_LARGEFILE 0
30 # endif
31 # ifndef O_NOFOLLOW
32 # define O_NOFOLLOW 0
33 # endif
34 # ifndef O_BINARY
35 # define O_BINARY 0
36 # endif
37 #endif
38
39
40 #if OS_WIN
41 # include <winbase.h>
42 #endif
43
44 #if OS_MAC
45 # include <extras.h>
46 # include <path2fss.h>
47 # include <TextUtils.h>
48 # include <FinderRegistry.h>
49 # include <Folders.h>
50 # include <Timer.h>
51 # include <OSUtils.h>
52 #endif
53
54 /*
55 ** The DJGPP compiler environment looks mostly like Unix, but it
56 ** lacks the fcntl() system call. So redefine fcntl() to be something
57 ** that always succeeds. This means that locking does not occur under
58 ** DJGPP. But its DOS - what did you expect?
59 */
60 #ifdef __DJGPP__
61 # define fcntl(A,B,C) 0
62 #endif
63
64 /*
65 ** Macros used to determine whether or not to use threads. The
66 ** SQLITE_UNIX_THREADS macro is defined if we are synchronizing for
67 ** Posix threads and SQLITE_W32_THREADS is defined if we are
68 ** synchronizing using Win32 threads.
69 */
70 #if OS_UNIX && defined(THREADSAFE) && THREADSAFE
71 # include <pthread.h>
72 # define SQLITE_UNIX_THREADS 1
73 #endif
74 #if OS_WIN && defined(THREADSAFE) && THREADSAFE
75 # define SQLITE_W32_THREADS 1
76 #endif
77 #if OS_MAC && defined(THREADSAFE) && THREADSAFE
78 # include <Multiprocessing.h>
79 # define SQLITE_MACOS_MULTITASKING 1
80 #endif
81
82 /*
83 ** Macros for performance tracing. Normally turned off
84 */
85 #if 0
86 static int last_page = 0;
87 __inline__ unsigned long long int hwtime(void){
88 unsigned long long int x;
89 __asm__("rdtsc\n\t"
90 "mov %%edx, %%ecx\n\t"
91 :"=A" (x));
92 return x;
93 }
94 static unsigned long long int g_start;
95 static unsigned int elapse;
96 #define TIMER_START g_start=hwtime()
97 #define TIMER_END elapse=hwtime()-g_start
98 #define SEEK(X) last_page=(X)
99 #define TRACE1(X) fprintf(stderr,X)
100 #define TRACE2(X,Y) fprintf(stderr,X,Y)
101 #define TRACE3(X,Y,Z) fprintf(stderr,X,Y,Z)
102 #define TRACE4(X,Y,Z,A) fprintf(stderr,X,Y,Z,A)
103 #define TRACE5(X,Y,Z,A,B) fprintf(stderr,X,Y,Z,A,B)
104 #else
105 #define TIMER_START
106 #define TIMER_END
107 #define SEEK(X)
108 #define TRACE1(X)
109 #define TRACE2(X,Y)
110 #define TRACE3(X,Y,Z)
111 #define TRACE4(X,Y,Z,A)
112 #define TRACE5(X,Y,Z,A,B)
113 #endif
114
115
116 #if OS_UNIX
117 /*
118 ** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
119 ** section 6.5.2.2 lines 483 through 490 specify that when a process
120 ** sets or clears a lock, that operation overrides any prior locks set
121 ** by the same process. It does not explicitly say so, but this implies
122 ** that it overrides locks set by the same process using a different
123 ** file descriptor. Consider this test case:
124 **
125 ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
126 ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
127 **
128 ** Suppose ./file1 and ./file2 are really the same file (because
129 ** one is a hard or symbolic link to the other) then if you set
130 ** an exclusive lock on fd1, then try to get an exclusive lock
131 ** on fd2, it works. I would have expected the second lock to
132 ** fail since there was already a lock on the file due to fd1.
133 ** But not so. Since both locks came from the same process, the
134 ** second overrides the first, even though they were on different
135 ** file descriptors opened on different file names.
136 **
137 ** Bummer. If you ask me, this is broken. Badly broken. It means
138 ** that we cannot use POSIX locks to synchronize file access among
139 ** competing threads of the same process. POSIX locks will work fine
140 ** to synchronize access for threads in separate processes, but not
141 ** threads within the same process.
142 **
143 ** To work around the problem, SQLite has to manage file locks internally
144 ** on its own. Whenever a new database is opened, we have to find the
145 ** specific inode of the database file (the inode is determined by the
146 ** st_dev and st_ino fields of the stat structure that fstat() fills in)
147 ** and check for locks already existing on that inode. When locks are
148 ** created or removed, we have to look at our own internal record of the
149 ** locks to see if another thread has previously set a lock on that same
150 ** inode.
151 **
152 ** The OsFile structure for POSIX is no longer just an integer file
153 ** descriptor. It is now a structure that holds the integer file
154 ** descriptor and a pointer to a structure that describes the internal
155 ** locks on the corresponding inode. There is one locking structure
156 ** per inode, so if the same inode is opened twice, both OsFile structures
157 ** point to the same locking structure. The locking structure keeps
158 ** a reference count (so we will know when to delete it) and a "cnt"
159 ** field that tells us its internal lock status. cnt==0 means the
160 ** file is unlocked. cnt==-1 means the file has an exclusive lock.
161 ** cnt>0 means there are cnt shared locks on the file.
162 **
163 ** Any attempt to lock or unlock a file first checks the locking
164 ** structure. The fcntl() system call is only invoked to set a
165 ** POSIX lock if the internal lock structure transitions between
166 ** a locked and an unlocked state.
167 **
168 ** 2004-Jan-11:
169 ** More recent discoveries about POSIX advisory locks. (The more
170 ** I discover, the more I realize the a POSIX advisory locks are
171 ** an abomination.)
172 **
173 ** If you close a file descriptor that points to a file that has locks,
174 ** all locks on that file that are owned by the current process are
175 ** released. To work around this problem, each OsFile structure contains
176 ** a pointer to an openCnt structure. There is one openCnt structure
177 ** per open inode, which means that multiple OsFiles can point to a single
178 ** openCnt. When an attempt is made to close an OsFile, if there are
179 ** other OsFiles open on the same inode that are holding locks, the call
180 ** to close() the file descriptor is deferred until all of the locks clear.
181 ** The openCnt structure keeps a list of file descriptors that need to
182 ** be closed and that list is walked (and cleared) when the last lock
183 ** clears.
184 **
185 ** First, under Linux threads, because each thread has a separate
186 ** process ID, lock operations in one thread do not override locks
187 ** to the same file in other threads. Linux threads behave like
188 ** separate processes in this respect. But, if you close a file
189 ** descriptor in linux threads, all locks are cleared, even locks
190 ** on other threads and even though the other threads have different
191 ** process IDs. Linux threads is inconsistent in this respect.
192 ** (I'm beginning to think that linux threads is an abomination too.)
193 ** The consequence of this all is that the hash table for the lockInfo
194 ** structure has to include the process id as part of its key because
195 ** locks in different threads are treated as distinct. But the
196 ** openCnt structure should not include the process id in its
197 ** key because close() clears lock on all threads, not just the current
198 ** thread. Were it not for this goofiness in linux threads, we could
199 ** combine the lockInfo and openCnt structures into a single structure.
200 */
201
202 /*
203 ** An instance of the following structure serves as the key used
204 ** to locate a particular lockInfo structure given its inode. Note
205 ** that we have to include the process ID as part of the key. On some
206 ** threading implementations (ex: linux), each thread has a separate
207 ** process ID.
208 */
209 struct lockKey {
210 dev_t dev; /* Device number */
211 ino_t ino; /* Inode number */
212 pid_t pid; /* Process ID */
213 };
214
215 /*
216 ** An instance of the following structure is allocated for each open
217 ** inode on each thread with a different process ID. (Threads have
218 ** different process IDs on linux, but not on most other unixes.)
219 **
220 ** A single inode can have multiple file descriptors, so each OsFile
221 ** structure contains a pointer to an instance of this object and this
222 ** object keeps a count of the number of OsFiles pointing to it.
223 */
224 struct lockInfo {
225 struct lockKey key; /* The lookup key */
226 int cnt; /* 0: unlocked. -1: write lock. 1...: read lock. */
227 int nRef; /* Number of pointers to this structure */
228 };
229
230 /*
231 ** An instance of the following structure serves as the key used
232 ** to locate a particular openCnt structure given its inode. This
233 ** is the same as the lockKey except that the process ID is omitted.
234 */
235 struct openKey {
236 dev_t dev; /* Device number */
237 ino_t ino; /* Inode number */
238 };
239
240 /*
241 ** An instance of the following structure is allocated for each open
242 ** inode. This structure keeps track of the number of locks on that
243 ** inode. If a close is attempted against an inode that is holding
244 ** locks, the close is deferred until all locks clear by adding the
245 ** file descriptor to be closed to the pending list.
246 */
247 struct openCnt {
248 struct openKey key; /* The lookup key */
249 int nRef; /* Number of pointers to this structure */
250 int nLock; /* Number of outstanding locks */
251 int nPending; /* Number of pending close() operations */
252 int *aPending; /* Malloced space holding fd's awaiting a close() */
253 };
254
255 /*
256 ** These hash table maps inodes and process IDs into lockInfo and openCnt
257 ** structures. Access to these hash tables must be protected by a mutex.
258 */
259 static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
260 static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
261
262 /*
263 ** Release a lockInfo structure previously allocated by findLockInfo().
264 */
releaseLockInfo(struct lockInfo * pLock)265 static void releaseLockInfo(struct lockInfo *pLock){
266 pLock->nRef--;
267 if( pLock->nRef==0 ){
268 sqliteHashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
269 sqliteFree(pLock);
270 }
271 }
272
273 /*
274 ** Release a openCnt structure previously allocated by findLockInfo().
275 */
releaseOpenCnt(struct openCnt * pOpen)276 static void releaseOpenCnt(struct openCnt *pOpen){
277 pOpen->nRef--;
278 if( pOpen->nRef==0 ){
279 sqliteHashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
280 sqliteFree(pOpen->aPending);
281 sqliteFree(pOpen);
282 }
283 }
284
285 /*
286 ** Given a file descriptor, locate lockInfo and openCnt structures that
287 ** describes that file descriptor. Create a new ones if necessary. The
288 ** return values might be unset if an error occurs.
289 **
290 ** Return the number of errors.
291 */
findLockInfo(int fd,struct lockInfo ** ppLock,struct openCnt ** ppOpen)292 int findLockInfo(
293 int fd, /* The file descriptor used in the key */
294 struct lockInfo **ppLock, /* Return the lockInfo structure here */
295 struct openCnt **ppOpen /* Return the openCnt structure here */
296 ){
297 int rc;
298 struct lockKey key1;
299 struct openKey key2;
300 struct stat statbuf;
301 struct lockInfo *pLock;
302 struct openCnt *pOpen;
303 rc = fstat(fd, &statbuf);
304 if( rc!=0 ) return 1;
305 memset(&key1, 0, sizeof(key1));
306 key1.dev = statbuf.st_dev;
307 key1.ino = statbuf.st_ino;
308 key1.pid = getpid();
309 memset(&key2, 0, sizeof(key2));
310 key2.dev = statbuf.st_dev;
311 key2.ino = statbuf.st_ino;
312 pLock = (struct lockInfo*)sqliteHashFind(&lockHash, &key1, sizeof(key1));
313 if( pLock==0 ){
314 struct lockInfo *pOld;
315 pLock = sqliteMallocRaw( sizeof(*pLock) );
316 if( pLock==0 ) return 1;
317 pLock->key = key1;
318 pLock->nRef = 1;
319 pLock->cnt = 0;
320 pOld = sqliteHashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
321 if( pOld!=0 ){
322 assert( pOld==pLock );
323 sqliteFree(pLock);
324 return 1;
325 }
326 }else{
327 pLock->nRef++;
328 }
329 *ppLock = pLock;
330 pOpen = (struct openCnt*)sqliteHashFind(&openHash, &key2, sizeof(key2));
331 if( pOpen==0 ){
332 struct openCnt *pOld;
333 pOpen = sqliteMallocRaw( sizeof(*pOpen) );
334 if( pOpen==0 ){
335 releaseLockInfo(pLock);
336 return 1;
337 }
338 pOpen->key = key2;
339 pOpen->nRef = 1;
340 pOpen->nLock = 0;
341 pOpen->nPending = 0;
342 pOpen->aPending = 0;
343 pOld = sqliteHashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
344 if( pOld!=0 ){
345 assert( pOld==pOpen );
346 sqliteFree(pOpen);
347 releaseLockInfo(pLock);
348 return 1;
349 }
350 }else{
351 pOpen->nRef++;
352 }
353 *ppOpen = pOpen;
354 return 0;
355 }
356
357 #endif /** POSIX advisory lock work-around **/
358
359 /*
360 ** If we compile with the SQLITE_TEST macro set, then the following block
361 ** of code will give us the ability to simulate a disk I/O error. This
362 ** is used for testing the I/O recovery logic.
363 */
364 #ifdef SQLITE_TEST
365 int sqlite_io_error_pending = 0;
366 #define SimulateIOError(A) \
367 if( sqlite_io_error_pending ) \
368 if( sqlite_io_error_pending-- == 1 ){ local_ioerr(); return A; }
local_ioerr()369 static void local_ioerr(){
370 sqlite_io_error_pending = 0; /* Really just a place to set a breakpoint */
371 }
372 #else
373 #define SimulateIOError(A)
374 #endif
375
376 /*
377 ** When testing, keep a count of the number of open files.
378 */
379 #ifdef SQLITE_TEST
380 int sqlite_open_file_count = 0;
381 #define OpenCounter(X) sqlite_open_file_count+=(X)
382 #else
383 #define OpenCounter(X)
384 #endif
385
386
387 /*
388 ** Delete the named file
389 */
sqliteOsDelete(const char * zFilename)390 int sqliteOsDelete(const char *zFilename){
391 #if OS_UNIX
392 unlink(zFilename);
393 #endif
394 #if OS_WIN
395 DeleteFile(zFilename);
396 #endif
397 #if OS_MAC
398 unlink(zFilename);
399 #endif
400 return SQLITE_OK;
401 }
402
403 /*
404 ** Return TRUE if the named file exists.
405 */
sqliteOsFileExists(const char * zFilename)406 int sqliteOsFileExists(const char *zFilename){
407 #if OS_UNIX
408 return access(zFilename, 0)==0;
409 #endif
410 #if OS_WIN
411 return GetFileAttributes(zFilename) != 0xffffffff;
412 #endif
413 #if OS_MAC
414 return access(zFilename, 0)==0;
415 #endif
416 }
417
418
419 #if 0 /* NOT USED */
420 /*
421 ** Change the name of an existing file.
422 */
423 int sqliteOsFileRename(const char *zOldName, const char *zNewName){
424 #if OS_UNIX
425 if( link(zOldName, zNewName) ){
426 return SQLITE_ERROR;
427 }
428 unlink(zOldName);
429 return SQLITE_OK;
430 #endif
431 #if OS_WIN
432 if( !MoveFile(zOldName, zNewName) ){
433 return SQLITE_ERROR;
434 }
435 return SQLITE_OK;
436 #endif
437 #if OS_MAC
438 /**** FIX ME ***/
439 return SQLITE_ERROR;
440 #endif
441 }
442 #endif /* NOT USED */
443
444 /*
445 ** Attempt to open a file for both reading and writing. If that
446 ** fails, try opening it read-only. If the file does not exist,
447 ** try to create it.
448 **
449 ** On success, a handle for the open file is written to *id
450 ** and *pReadonly is set to 0 if the file was opened for reading and
451 ** writing or 1 if the file was opened read-only. The function returns
452 ** SQLITE_OK.
453 **
454 ** On failure, the function returns SQLITE_CANTOPEN and leaves
455 ** *id and *pReadonly unchanged.
456 */
sqliteOsOpenReadWrite(const char * zFilename,OsFile * id,int * pReadonly)457 int sqliteOsOpenReadWrite(
458 const char *zFilename,
459 OsFile *id,
460 int *pReadonly
461 ){
462 #if OS_UNIX
463 int rc;
464 id->dirfd = -1;
465 id->fd = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY, 0644);
466 if( id->fd<0 ){
467 #ifdef EISDIR
468 if( errno==EISDIR ){
469 return SQLITE_CANTOPEN;
470 }
471 #endif
472 id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
473 if( id->fd<0 ){
474 return SQLITE_CANTOPEN;
475 }
476 *pReadonly = 1;
477 }else{
478 *pReadonly = 0;
479 }
480 sqliteOsEnterMutex();
481 rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
482 sqliteOsLeaveMutex();
483 if( rc ){
484 close(id->fd);
485 return SQLITE_NOMEM;
486 }
487 id->locked = 0;
488 TRACE3("OPEN %-3d %s\n", id->fd, zFilename);
489 OpenCounter(+1);
490 return SQLITE_OK;
491 #endif
492 #if OS_WIN
493 HANDLE h = CreateFile(zFilename,
494 GENERIC_READ | GENERIC_WRITE,
495 FILE_SHARE_READ | FILE_SHARE_WRITE,
496 NULL,
497 OPEN_ALWAYS,
498 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
499 NULL
500 );
501 if( h==INVALID_HANDLE_VALUE ){
502 h = CreateFile(zFilename,
503 GENERIC_READ,
504 FILE_SHARE_READ,
505 NULL,
506 OPEN_ALWAYS,
507 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
508 NULL
509 );
510 if( h==INVALID_HANDLE_VALUE ){
511 return SQLITE_CANTOPEN;
512 }
513 *pReadonly = 1;
514 }else{
515 *pReadonly = 0;
516 }
517 id->h = h;
518 id->locked = 0;
519 OpenCounter(+1);
520 return SQLITE_OK;
521 #endif
522 #if OS_MAC
523 FSSpec fsSpec;
524 # ifdef _LARGE_FILE
525 HFSUniStr255 dfName;
526 FSRef fsRef;
527 if( __path2fss(zFilename, &fsSpec) != noErr ){
528 if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
529 return SQLITE_CANTOPEN;
530 }
531 if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
532 return SQLITE_CANTOPEN;
533 FSGetDataForkName(&dfName);
534 if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
535 fsRdWrShPerm, &(id->refNum)) != noErr ){
536 if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
537 fsRdWrPerm, &(id->refNum)) != noErr ){
538 if (FSOpenFork(&fsRef, dfName.length, dfName.unicode,
539 fsRdPerm, &(id->refNum)) != noErr )
540 return SQLITE_CANTOPEN;
541 else
542 *pReadonly = 1;
543 } else
544 *pReadonly = 0;
545 } else
546 *pReadonly = 0;
547 # else
548 __path2fss(zFilename, &fsSpec);
549 if( !sqliteOsFileExists(zFilename) ){
550 if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
551 return SQLITE_CANTOPEN;
552 }
553 if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNum)) != noErr ){
554 if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrPerm, &(id->refNum)) != noErr ){
555 if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdPerm, &(id->refNum)) != noErr )
556 return SQLITE_CANTOPEN;
557 else
558 *pReadonly = 1;
559 } else
560 *pReadonly = 0;
561 } else
562 *pReadonly = 0;
563 # endif
564 if( HOpenRF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNumRF)) != noErr){
565 id->refNumRF = -1;
566 }
567 id->locked = 0;
568 id->delOnClose = 0;
569 OpenCounter(+1);
570 return SQLITE_OK;
571 #endif
572 }
573
574
575 /*
576 ** Attempt to open a new file for exclusive access by this process.
577 ** The file will be opened for both reading and writing. To avoid
578 ** a potential security problem, we do not allow the file to have
579 ** previously existed. Nor do we allow the file to be a symbolic
580 ** link.
581 **
582 ** If delFlag is true, then make arrangements to automatically delete
583 ** the file when it is closed.
584 **
585 ** On success, write the file handle into *id and return SQLITE_OK.
586 **
587 ** On failure, return SQLITE_CANTOPEN.
588 */
sqliteOsOpenExclusive(const char * zFilename,OsFile * id,int delFlag)589 int sqliteOsOpenExclusive(const char *zFilename, OsFile *id, int delFlag){
590 #if OS_UNIX
591 int rc;
592 if( access(zFilename, 0)==0 ){
593 return SQLITE_CANTOPEN;
594 }
595 id->dirfd = -1;
596 id->fd = open(zFilename,
597 O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY, 0600);
598 if( id->fd<0 ){
599 return SQLITE_CANTOPEN;
600 }
601 sqliteOsEnterMutex();
602 rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
603 sqliteOsLeaveMutex();
604 if( rc ){
605 close(id->fd);
606 unlink(zFilename);
607 return SQLITE_NOMEM;
608 }
609 id->locked = 0;
610 if( delFlag ){
611 unlink(zFilename);
612 }
613 TRACE3("OPEN-EX %-3d %s\n", id->fd, zFilename);
614 OpenCounter(+1);
615 return SQLITE_OK;
616 #endif
617 #if OS_WIN
618 HANDLE h;
619 int fileflags;
620 if( delFlag ){
621 fileflags = FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_RANDOM_ACCESS
622 | FILE_FLAG_DELETE_ON_CLOSE;
623 }else{
624 fileflags = FILE_FLAG_RANDOM_ACCESS;
625 }
626 h = CreateFile(zFilename,
627 GENERIC_READ | GENERIC_WRITE,
628 0,
629 NULL,
630 CREATE_ALWAYS,
631 fileflags,
632 NULL
633 );
634 if( h==INVALID_HANDLE_VALUE ){
635 return SQLITE_CANTOPEN;
636 }
637 id->h = h;
638 id->locked = 0;
639 OpenCounter(+1);
640 return SQLITE_OK;
641 #endif
642 #if OS_MAC
643 FSSpec fsSpec;
644 # ifdef _LARGE_FILE
645 HFSUniStr255 dfName;
646 FSRef fsRef;
647 __path2fss(zFilename, &fsSpec);
648 if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
649 return SQLITE_CANTOPEN;
650 if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
651 return SQLITE_CANTOPEN;
652 FSGetDataForkName(&dfName);
653 if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
654 fsRdWrPerm, &(id->refNum)) != noErr )
655 return SQLITE_CANTOPEN;
656 # else
657 __path2fss(zFilename, &fsSpec);
658 if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
659 return SQLITE_CANTOPEN;
660 if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrPerm, &(id->refNum)) != noErr )
661 return SQLITE_CANTOPEN;
662 # endif
663 id->refNumRF = -1;
664 id->locked = 0;
665 id->delOnClose = delFlag;
666 if (delFlag)
667 id->pathToDel = sqliteOsFullPathname(zFilename);
668 OpenCounter(+1);
669 return SQLITE_OK;
670 #endif
671 }
672
673 /*
674 ** Attempt to open a new file for read-only access.
675 **
676 ** On success, write the file handle into *id and return SQLITE_OK.
677 **
678 ** On failure, return SQLITE_CANTOPEN.
679 */
sqliteOsOpenReadOnly(const char * zFilename,OsFile * id)680 int sqliteOsOpenReadOnly(const char *zFilename, OsFile *id){
681 #if OS_UNIX
682 int rc;
683 id->dirfd = -1;
684 id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
685 if( id->fd<0 ){
686 return SQLITE_CANTOPEN;
687 }
688 sqliteOsEnterMutex();
689 rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
690 sqliteOsLeaveMutex();
691 if( rc ){
692 close(id->fd);
693 return SQLITE_NOMEM;
694 }
695 id->locked = 0;
696 TRACE3("OPEN-RO %-3d %s\n", id->fd, zFilename);
697 OpenCounter(+1);
698 return SQLITE_OK;
699 #endif
700 #if OS_WIN
701 HANDLE h = CreateFile(zFilename,
702 GENERIC_READ,
703 0,
704 NULL,
705 OPEN_EXISTING,
706 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
707 NULL
708 );
709 if( h==INVALID_HANDLE_VALUE ){
710 return SQLITE_CANTOPEN;
711 }
712 id->h = h;
713 id->locked = 0;
714 OpenCounter(+1);
715 return SQLITE_OK;
716 #endif
717 #if OS_MAC
718 FSSpec fsSpec;
719 # ifdef _LARGE_FILE
720 HFSUniStr255 dfName;
721 FSRef fsRef;
722 if( __path2fss(zFilename, &fsSpec) != noErr )
723 return SQLITE_CANTOPEN;
724 if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
725 return SQLITE_CANTOPEN;
726 FSGetDataForkName(&dfName);
727 if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
728 fsRdPerm, &(id->refNum)) != noErr )
729 return SQLITE_CANTOPEN;
730 # else
731 __path2fss(zFilename, &fsSpec);
732 if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdPerm, &(id->refNum)) != noErr )
733 return SQLITE_CANTOPEN;
734 # endif
735 if( HOpenRF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNumRF)) != noErr){
736 id->refNumRF = -1;
737 }
738 id->locked = 0;
739 id->delOnClose = 0;
740 OpenCounter(+1);
741 return SQLITE_OK;
742 #endif
743 }
744
745 /*
746 ** Attempt to open a file descriptor for the directory that contains a
747 ** file. This file descriptor can be used to fsync() the directory
748 ** in order to make sure the creation of a new file is actually written
749 ** to disk.
750 **
751 ** This routine is only meaningful for Unix. It is a no-op under
752 ** windows since windows does not support hard links.
753 **
754 ** On success, a handle for a previously open file is at *id is
755 ** updated with the new directory file descriptor and SQLITE_OK is
756 ** returned.
757 **
758 ** On failure, the function returns SQLITE_CANTOPEN and leaves
759 ** *id unchanged.
760 */
sqliteOsOpenDirectory(const char * zDirname,OsFile * id)761 int sqliteOsOpenDirectory(
762 const char *zDirname,
763 OsFile *id
764 ){
765 #if OS_UNIX
766 if( id->fd<0 ){
767 /* Do not open the directory if the corresponding file is not already
768 ** open. */
769 return SQLITE_CANTOPEN;
770 }
771 assert( id->dirfd<0 );
772 id->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0644);
773 if( id->dirfd<0 ){
774 return SQLITE_CANTOPEN;
775 }
776 TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
777 #endif
778 return SQLITE_OK;
779 }
780
781 /*
782 ** If the following global variable points to a string which is the
783 ** name of a directory, then that directory will be used to store
784 ** temporary files.
785 */
786 const char *sqlite_temp_directory = 0;
787
788 /*
789 ** Create a temporary file name in zBuf. zBuf must be big enough to
790 ** hold at least SQLITE_TEMPNAME_SIZE characters.
791 */
sqliteOsTempFileName(char * zBuf)792 int sqliteOsTempFileName(char *zBuf){
793 #if OS_UNIX
794 static const char *azDirs[] = {
795 0,
796 "/var/tmp",
797 "/usr/tmp",
798 "/tmp",
799 ".",
800 };
801 static unsigned char zChars[] =
802 "abcdefghijklmnopqrstuvwxyz"
803 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
804 "0123456789";
805 int i, j;
806 struct stat buf;
807 const char *zDir = ".";
808 azDirs[0] = sqlite_temp_directory;
809 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
810 if( azDirs[i]==0 ) continue;
811 if( stat(azDirs[i], &buf) ) continue;
812 if( !S_ISDIR(buf.st_mode) ) continue;
813 if( access(azDirs[i], 07) ) continue;
814 zDir = azDirs[i];
815 break;
816 }
817 do{
818 sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
819 j = strlen(zBuf);
820 sqliteRandomness(15, &zBuf[j]);
821 for(i=0; i<15; i++, j++){
822 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
823 }
824 zBuf[j] = 0;
825 }while( access(zBuf,0)==0 );
826 #endif
827 #if OS_WIN
828 static char zChars[] =
829 "abcdefghijklmnopqrstuvwxyz"
830 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
831 "0123456789";
832 int i, j;
833 const char *zDir;
834 char zTempPath[SQLITE_TEMPNAME_SIZE];
835 if( sqlite_temp_directory==0 ){
836 GetTempPath(SQLITE_TEMPNAME_SIZE-30, zTempPath);
837 for(i=strlen(zTempPath); i>0 && zTempPath[i-1]=='\\'; i--){}
838 zTempPath[i] = 0;
839 zDir = zTempPath;
840 }else{
841 zDir = sqlite_temp_directory;
842 }
843 for(;;){
844 sprintf(zBuf, "%s\\"TEMP_FILE_PREFIX, zDir);
845 j = strlen(zBuf);
846 sqliteRandomness(15, &zBuf[j]);
847 for(i=0; i<15; i++, j++){
848 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
849 }
850 zBuf[j] = 0;
851 if( !sqliteOsFileExists(zBuf) ) break;
852 }
853 #endif
854 #if OS_MAC
855 static char zChars[] =
856 "abcdefghijklmnopqrstuvwxyz"
857 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
858 "0123456789";
859 int i, j;
860 char *zDir;
861 char zTempPath[SQLITE_TEMPNAME_SIZE];
862 char zdirName[32];
863 CInfoPBRec infoRec;
864 Str31 dirName;
865 memset(&infoRec, 0, sizeof(infoRec));
866 memset(zTempPath, 0, SQLITE_TEMPNAME_SIZE);
867 if( sqlite_temp_directory!=0 ){
868 zDir = sqlite_temp_directory;
869 }else if( FindFolder(kOnSystemDisk, kTemporaryFolderType, kCreateFolder,
870 &(infoRec.dirInfo.ioVRefNum), &(infoRec.dirInfo.ioDrParID)) == noErr ){
871 infoRec.dirInfo.ioNamePtr = dirName;
872 do{
873 infoRec.dirInfo.ioFDirIndex = -1;
874 infoRec.dirInfo.ioDrDirID = infoRec.dirInfo.ioDrParID;
875 if( PBGetCatInfoSync(&infoRec) == noErr ){
876 CopyPascalStringToC(dirName, zdirName);
877 i = strlen(zdirName);
878 memmove(&(zTempPath[i+1]), zTempPath, strlen(zTempPath));
879 strcpy(zTempPath, zdirName);
880 zTempPath[i] = ':';
881 }else{
882 *zTempPath = 0;
883 break;
884 }
885 } while( infoRec.dirInfo.ioDrDirID != fsRtDirID );
886 zDir = zTempPath;
887 }
888 if( zDir[0]==0 ){
889 getcwd(zTempPath, SQLITE_TEMPNAME_SIZE-24);
890 zDir = zTempPath;
891 }
892 for(;;){
893 sprintf(zBuf, "%s"TEMP_FILE_PREFIX, zDir);
894 j = strlen(zBuf);
895 sqliteRandomness(15, &zBuf[j]);
896 for(i=0; i<15; i++, j++){
897 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
898 }
899 zBuf[j] = 0;
900 if( !sqliteOsFileExists(zBuf) ) break;
901 }
902 #endif
903 return SQLITE_OK;
904 }
905
906 /*
907 ** Close a file.
908 */
sqliteOsClose(OsFile * id)909 int sqliteOsClose(OsFile *id){
910 #if OS_UNIX
911 sqliteOsUnlock(id);
912 if( id->dirfd>=0 ) close(id->dirfd);
913 id->dirfd = -1;
914 sqliteOsEnterMutex();
915 if( id->pOpen->nLock ){
916 /* If there are outstanding locks, do not actually close the file just
917 ** yet because that would clear those locks. Instead, add the file
918 ** descriptor to pOpen->aPending. It will be automatically closed when
919 ** the last lock is cleared.
920 */
921 int *aNew;
922 struct openCnt *pOpen = id->pOpen;
923 pOpen->nPending++;
924 aNew = sqliteRealloc( pOpen->aPending, pOpen->nPending*sizeof(int) );
925 if( aNew==0 ){
926 /* If a malloc fails, just leak the file descriptor */
927 }else{
928 pOpen->aPending = aNew;
929 pOpen->aPending[pOpen->nPending-1] = id->fd;
930 }
931 }else{
932 /* There are no outstanding locks so we can close the file immediately */
933 close(id->fd);
934 }
935 releaseLockInfo(id->pLock);
936 releaseOpenCnt(id->pOpen);
937 sqliteOsLeaveMutex();
938 TRACE2("CLOSE %-3d\n", id->fd);
939 OpenCounter(-1);
940 return SQLITE_OK;
941 #endif
942 #if OS_WIN
943 CloseHandle(id->h);
944 OpenCounter(-1);
945 return SQLITE_OK;
946 #endif
947 #if OS_MAC
948 if( id->refNumRF!=-1 )
949 FSClose(id->refNumRF);
950 # ifdef _LARGE_FILE
951 FSCloseFork(id->refNum);
952 # else
953 FSClose(id->refNum);
954 # endif
955 if( id->delOnClose ){
956 unlink(id->pathToDel);
957 sqliteFree(id->pathToDel);
958 }
959 OpenCounter(-1);
960 return SQLITE_OK;
961 #endif
962 }
963
964 /*
965 ** Read data from a file into a buffer. Return SQLITE_OK if all
966 ** bytes were read successfully and SQLITE_IOERR if anything goes
967 ** wrong.
968 */
sqliteOsRead(OsFile * id,void * pBuf,int amt)969 int sqliteOsRead(OsFile *id, void *pBuf, int amt){
970 #if OS_UNIX
971 int got;
972 SimulateIOError(SQLITE_IOERR);
973 TIMER_START;
974 got = read(id->fd, pBuf, amt);
975 TIMER_END;
976 TRACE4("READ %-3d %7d %d\n", id->fd, last_page, elapse);
977 SEEK(0);
978 /* if( got<0 ) got = 0; */
979 if( got==amt ){
980 return SQLITE_OK;
981 }else{
982 return SQLITE_IOERR;
983 }
984 #endif
985 #if OS_WIN
986 DWORD got;
987 SimulateIOError(SQLITE_IOERR);
988 TRACE2("READ %d\n", last_page);
989 if( !ReadFile(id->h, pBuf, amt, &got, 0) ){
990 got = 0;
991 }
992 if( got==(DWORD)amt ){
993 return SQLITE_OK;
994 }else{
995 return SQLITE_IOERR;
996 }
997 #endif
998 #if OS_MAC
999 int got;
1000 SimulateIOError(SQLITE_IOERR);
1001 TRACE2("READ %d\n", last_page);
1002 # ifdef _LARGE_FILE
1003 FSReadFork(id->refNum, fsAtMark, 0, (ByteCount)amt, pBuf, (ByteCount*)&got);
1004 # else
1005 got = amt;
1006 FSRead(id->refNum, &got, pBuf);
1007 # endif
1008 if( got==amt ){
1009 return SQLITE_OK;
1010 }else{
1011 return SQLITE_IOERR;
1012 }
1013 #endif
1014 }
1015
1016 /*
1017 ** Write data from a buffer into a file. Return SQLITE_OK on success
1018 ** or some other error code on failure.
1019 */
sqliteOsWrite(OsFile * id,const void * pBuf,int amt)1020 int sqliteOsWrite(OsFile *id, const void *pBuf, int amt){
1021 #if OS_UNIX
1022 int wrote = 0;
1023 SimulateIOError(SQLITE_IOERR);
1024 TIMER_START;
1025 while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){
1026 amt -= wrote;
1027 pBuf = &((char*)pBuf)[wrote];
1028 }
1029 TIMER_END;
1030 TRACE4("WRITE %-3d %7d %d\n", id->fd, last_page, elapse);
1031 SEEK(0);
1032 if( amt>0 ){
1033 return SQLITE_FULL;
1034 }
1035 return SQLITE_OK;
1036 #endif
1037 #if OS_WIN
1038 int rc;
1039 DWORD wrote;
1040 SimulateIOError(SQLITE_IOERR);
1041 TRACE2("WRITE %d\n", last_page);
1042 while( amt>0 && (rc = WriteFile(id->h, pBuf, amt, &wrote, 0))!=0 && wrote>0 ){
1043 amt -= wrote;
1044 pBuf = &((char*)pBuf)[wrote];
1045 }
1046 if( !rc || amt>(int)wrote ){
1047 return SQLITE_FULL;
1048 }
1049 return SQLITE_OK;
1050 #endif
1051 #if OS_MAC
1052 OSErr oserr;
1053 int wrote = 0;
1054 SimulateIOError(SQLITE_IOERR);
1055 TRACE2("WRITE %d\n", last_page);
1056 while( amt>0 ){
1057 # ifdef _LARGE_FILE
1058 oserr = FSWriteFork(id->refNum, fsAtMark, 0,
1059 (ByteCount)amt, pBuf, (ByteCount*)&wrote);
1060 # else
1061 wrote = amt;
1062 oserr = FSWrite(id->refNum, &wrote, pBuf);
1063 # endif
1064 if( wrote == 0 || oserr != noErr)
1065 break;
1066 amt -= wrote;
1067 pBuf = &((char*)pBuf)[wrote];
1068 }
1069 if( oserr != noErr || amt>wrote ){
1070 return SQLITE_FULL;
1071 }
1072 return SQLITE_OK;
1073 #endif
1074 }
1075
1076 /*
1077 ** Move the read/write pointer in a file.
1078 */
sqliteOsSeek(OsFile * id,off_t offset)1079 int sqliteOsSeek(OsFile *id, off_t offset){
1080 SEEK(offset/1024 + 1);
1081 #if OS_UNIX
1082 lseek(id->fd, offset, SEEK_SET);
1083 return SQLITE_OK;
1084 #endif
1085 #if OS_WIN
1086 {
1087 LONG upperBits = offset>>32;
1088 LONG lowerBits = offset & 0xffffffff;
1089 DWORD rc;
1090 rc = SetFilePointer(id->h, lowerBits, &upperBits, FILE_BEGIN);
1091 /* TRACE3("SEEK rc=0x%x upper=0x%x\n", rc, upperBits); */
1092 }
1093 return SQLITE_OK;
1094 #endif
1095 #if OS_MAC
1096 {
1097 off_t curSize;
1098 if( sqliteOsFileSize(id, &curSize) != SQLITE_OK ){
1099 return SQLITE_IOERR;
1100 }
1101 if( offset >= curSize ){
1102 if( sqliteOsTruncate(id, offset+1) != SQLITE_OK ){
1103 return SQLITE_IOERR;
1104 }
1105 }
1106 # ifdef _LARGE_FILE
1107 if( FSSetForkPosition(id->refNum, fsFromStart, offset) != noErr ){
1108 # else
1109 if( SetFPos(id->refNum, fsFromStart, offset) != noErr ){
1110 # endif
1111 return SQLITE_IOERR;
1112 }else{
1113 return SQLITE_OK;
1114 }
1115 }
1116 #endif
1117 }
1118
1119 /*
1120 ** Make sure all writes to a particular file are committed to disk.
1121 **
1122 ** Under Unix, also make sure that the directory entry for the file
1123 ** has been created by fsync-ing the directory that contains the file.
1124 ** If we do not do this and we encounter a power failure, the directory
1125 ** entry for the journal might not exist after we reboot. The next
1126 ** SQLite to access the file will not know that the journal exists (because
1127 ** the directory entry for the journal was never created) and the transaction
1128 ** will not roll back - possibly leading to database corruption.
1129 */
1130 int sqliteOsSync(OsFile *id){
1131 #if OS_UNIX
1132 SimulateIOError(SQLITE_IOERR);
1133 TRACE2("SYNC %-3d\n", id->fd);
1134 if( fsync(id->fd) ){
1135 return SQLITE_IOERR;
1136 }else{
1137 if( id->dirfd>=0 ){
1138 TRACE2("DIRSYNC %-3d\n", id->dirfd);
1139 fsync(id->dirfd);
1140 close(id->dirfd); /* Only need to sync once, so close the directory */
1141 id->dirfd = -1; /* when we are done. */
1142 }
1143 return SQLITE_OK;
1144 }
1145 #endif
1146 #if OS_WIN
1147 if( FlushFileBuffers(id->h) ){
1148 return SQLITE_OK;
1149 }else{
1150 return SQLITE_IOERR;
1151 }
1152 #endif
1153 #if OS_MAC
1154 # ifdef _LARGE_FILE
1155 if( FSFlushFork(id->refNum) != noErr ){
1156 # else
1157 ParamBlockRec params;
1158 memset(¶ms, 0, sizeof(ParamBlockRec));
1159 params.ioParam.ioRefNum = id->refNum;
1160 if( PBFlushFileSync(¶ms) != noErr ){
1161 # endif
1162 return SQLITE_IOERR;
1163 }else{
1164 return SQLITE_OK;
1165 }
1166 #endif
1167 }
1168
1169 /*
1170 ** Truncate an open file to a specified size
1171 */
1172 int sqliteOsTruncate(OsFile *id, off_t nByte){
1173 SimulateIOError(SQLITE_IOERR);
1174 #if OS_UNIX
1175 return ftruncate(id->fd, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
1176 #endif
1177 #if OS_WIN
1178 {
1179 LONG upperBits = nByte>>32;
1180 SetFilePointer(id->h, nByte, &upperBits, FILE_BEGIN);
1181 SetEndOfFile(id->h);
1182 }
1183 return SQLITE_OK;
1184 #endif
1185 #if OS_MAC
1186 # ifdef _LARGE_FILE
1187 if( FSSetForkSize(id->refNum, fsFromStart, nByte) != noErr){
1188 # else
1189 if( SetEOF(id->refNum, nByte) != noErr ){
1190 # endif
1191 return SQLITE_IOERR;
1192 }else{
1193 return SQLITE_OK;
1194 }
1195 #endif
1196 }
1197
1198 /*
1199 ** Determine the current size of a file in bytes
1200 */
1201 int sqliteOsFileSize(OsFile *id, off_t *pSize){
1202 #if OS_UNIX
1203 struct stat buf;
1204 SimulateIOError(SQLITE_IOERR);
1205 if( fstat(id->fd, &buf)!=0 ){
1206 return SQLITE_IOERR;
1207 }
1208 *pSize = buf.st_size;
1209 return SQLITE_OK;
1210 #endif
1211 #if OS_WIN
1212 DWORD upperBits, lowerBits;
1213 SimulateIOError(SQLITE_IOERR);
1214 lowerBits = GetFileSize(id->h, &upperBits);
1215 *pSize = (((off_t)upperBits)<<32) + lowerBits;
1216 return SQLITE_OK;
1217 #endif
1218 #if OS_MAC
1219 # ifdef _LARGE_FILE
1220 if( FSGetForkSize(id->refNum, pSize) != noErr){
1221 # else
1222 if( GetEOF(id->refNum, pSize) != noErr ){
1223 # endif
1224 return SQLITE_IOERR;
1225 }else{
1226 return SQLITE_OK;
1227 }
1228 #endif
1229 }
1230
1231 #if OS_WIN
1232 /*
1233 ** Return true (non-zero) if we are running under WinNT, Win2K or WinXP.
1234 ** Return false (zero) for Win95, Win98, or WinME.
1235 **
1236 ** Here is an interesting observation: Win95, Win98, and WinME lack
1237 ** the LockFileEx() API. But we can still statically link against that
1238 ** API as long as we don't call it win running Win95/98/ME. A call to
1239 ** this routine is used to determine if the host is Win95/98/ME or
1240 ** WinNT/2K/XP so that we will know whether or not we can safely call
1241 ** the LockFileEx() API.
1242 */
1243 int isNT(void){
1244 static int osType = 0; /* 0=unknown 1=win95 2=winNT */
1245 if( osType==0 ){
1246 OSVERSIONINFO sInfo;
1247 sInfo.dwOSVersionInfoSize = sizeof(sInfo);
1248 GetVersionEx(&sInfo);
1249 osType = sInfo.dwPlatformId==VER_PLATFORM_WIN32_NT ? 2 : 1;
1250 }
1251 return osType==2;
1252 }
1253 #endif
1254
1255 /*
1256 ** Windows file locking notes: [similar issues apply to MacOS]
1257 **
1258 ** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because
1259 ** those functions are not available. So we use only LockFile() and
1260 ** UnlockFile().
1261 **
1262 ** LockFile() prevents not just writing but also reading by other processes.
1263 ** (This is a design error on the part of Windows, but there is nothing
1264 ** we can do about that.) So the region used for locking is at the
1265 ** end of the file where it is unlikely to ever interfere with an
1266 ** actual read attempt.
1267 **
1268 ** A database read lock is obtained by locking a single randomly-chosen
1269 ** byte out of a specific range of bytes. The lock byte is obtained at
1270 ** random so two separate readers can probably access the file at the
1271 ** same time, unless they are unlucky and choose the same lock byte.
1272 ** A database write lock is obtained by locking all bytes in the range.
1273 ** There can only be one writer.
1274 **
1275 ** A lock is obtained on the first byte of the lock range before acquiring
1276 ** either a read lock or a write lock. This prevents two processes from
1277 ** attempting to get a lock at a same time. The semantics of
1278 ** sqliteOsReadLock() require that if there is already a write lock, that
1279 ** lock is converted into a read lock atomically. The lock on the first
1280 ** byte allows us to drop the old write lock and get the read lock without
1281 ** another process jumping into the middle and messing us up. The same
1282 ** argument applies to sqliteOsWriteLock().
1283 **
1284 ** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available,
1285 ** which means we can use reader/writer locks. When reader writer locks
1286 ** are used, the lock is placed on the same range of bytes that is used
1287 ** for probabilistic locking in Win95/98/ME. Hence, the locking scheme
1288 ** will support two or more Win95 readers or two or more WinNT readers.
1289 ** But a single Win95 reader will lock out all WinNT readers and a single
1290 ** WinNT reader will lock out all other Win95 readers.
1291 **
1292 ** Note: On MacOS we use the resource fork for locking.
1293 **
1294 ** The following #defines specify the range of bytes used for locking.
1295 ** N_LOCKBYTE is the number of bytes available for doing the locking.
1296 ** The first byte used to hold the lock while the lock is changing does
1297 ** not count toward this number. FIRST_LOCKBYTE is the address of
1298 ** the first byte in the range of bytes used for locking.
1299 */
1300 #define N_LOCKBYTE 10239
1301 #if OS_MAC
1302 # define FIRST_LOCKBYTE (0x000fffff - N_LOCKBYTE)
1303 #else
1304 # define FIRST_LOCKBYTE (0xffffffff - N_LOCKBYTE)
1305 #endif
1306
1307 /*
1308 ** Change the status of the lock on the file "id" to be a readlock.
1309 ** If the file was write locked, then this reduces the lock to a read.
1310 ** If the file was read locked, then this acquires a new read lock.
1311 **
1312 ** Return SQLITE_OK on success and SQLITE_BUSY on failure. If this
1313 ** library was compiled with large file support (LFS) but LFS is not
1314 ** available on the host, then an SQLITE_NOLFS is returned.
1315 */
1316 int sqliteOsReadLock(OsFile *id){
1317 #if OS_UNIX
1318 int rc;
1319 sqliteOsEnterMutex();
1320 if( id->pLock->cnt>0 ){
1321 if( !id->locked ){
1322 id->pLock->cnt++;
1323 id->locked = 1;
1324 id->pOpen->nLock++;
1325 }
1326 rc = SQLITE_OK;
1327 }else if( id->locked || id->pLock->cnt==0 ){
1328 struct flock lock;
1329 int s;
1330 lock.l_type = F_RDLCK;
1331 lock.l_whence = SEEK_SET;
1332 lock.l_start = lock.l_len = 0L;
1333 s = fcntl(id->fd, F_SETLK, &lock);
1334 if( s!=0 ){
1335 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1336 }else{
1337 rc = SQLITE_OK;
1338 if( !id->locked ){
1339 id->pOpen->nLock++;
1340 id->locked = 1;
1341 }
1342 id->pLock->cnt = 1;
1343 }
1344 }else{
1345 rc = SQLITE_BUSY;
1346 }
1347 sqliteOsLeaveMutex();
1348 return rc;
1349 #endif
1350 #if OS_WIN
1351 int rc;
1352 if( id->locked>0 ){
1353 rc = SQLITE_OK;
1354 }else{
1355 int lk;
1356 int res;
1357 int cnt = 100;
1358 sqliteRandomness(sizeof(lk), &lk);
1359 lk = (lk & 0x7fffffff)%N_LOCKBYTE + 1;
1360 while( cnt-->0 && (res = LockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0))==0 ){
1361 Sleep(1);
1362 }
1363 if( res ){
1364 UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1365 if( isNT() ){
1366 OVERLAPPED ovlp;
1367 ovlp.Offset = FIRST_LOCKBYTE+1;
1368 ovlp.OffsetHigh = 0;
1369 ovlp.hEvent = 0;
1370 res = LockFileEx(id->h, LOCKFILE_FAIL_IMMEDIATELY,
1371 0, N_LOCKBYTE, 0, &ovlp);
1372 }else{
1373 res = LockFile(id->h, FIRST_LOCKBYTE+lk, 0, 1, 0);
1374 }
1375 UnlockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0);
1376 }
1377 if( res ){
1378 id->locked = lk;
1379 rc = SQLITE_OK;
1380 }else{
1381 rc = SQLITE_BUSY;
1382 }
1383 }
1384 return rc;
1385 #endif
1386 #if OS_MAC
1387 int rc;
1388 if( id->locked>0 || id->refNumRF == -1 ){
1389 rc = SQLITE_OK;
1390 }else{
1391 int lk;
1392 OSErr res;
1393 int cnt = 5;
1394 ParamBlockRec params;
1395 sqliteRandomness(sizeof(lk), &lk);
1396 lk = (lk & 0x7fffffff)%N_LOCKBYTE + 1;
1397 memset(¶ms, 0, sizeof(params));
1398 params.ioParam.ioRefNum = id->refNumRF;
1399 params.ioParam.ioPosMode = fsFromStart;
1400 params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1401 params.ioParam.ioReqCount = 1;
1402 while( cnt-->0 && (res = PBLockRangeSync(¶ms))!=noErr ){
1403 UInt32 finalTicks;
1404 Delay(1, &finalTicks); /* 1/60 sec */
1405 }
1406 if( res == noErr ){
1407 params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1408 params.ioParam.ioReqCount = N_LOCKBYTE;
1409 PBUnlockRangeSync(¶ms);
1410 params.ioParam.ioPosOffset = FIRST_LOCKBYTE+lk;
1411 params.ioParam.ioReqCount = 1;
1412 res = PBLockRangeSync(¶ms);
1413 params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1414 params.ioParam.ioReqCount = 1;
1415 PBUnlockRangeSync(¶ms);
1416 }
1417 if( res == noErr ){
1418 id->locked = lk;
1419 rc = SQLITE_OK;
1420 }else{
1421 rc = SQLITE_BUSY;
1422 }
1423 }
1424 return rc;
1425 #endif
1426 }
1427
1428 /*
1429 ** Change the lock status to be an exclusive or write lock. Return
1430 ** SQLITE_OK on success and SQLITE_BUSY on a failure. If this
1431 ** library was compiled with large file support (LFS) but LFS is not
1432 ** available on the host, then an SQLITE_NOLFS is returned.
1433 */
1434 int sqliteOsWriteLock(OsFile *id){
1435 #if OS_UNIX
1436 int rc;
1437 sqliteOsEnterMutex();
1438 if( id->pLock->cnt==0 || (id->pLock->cnt==1 && id->locked==1) ){
1439 struct flock lock;
1440 int s;
1441 lock.l_type = F_WRLCK;
1442 lock.l_whence = SEEK_SET;
1443 lock.l_start = lock.l_len = 0L;
1444 s = fcntl(id->fd, F_SETLK, &lock);
1445 if( s!=0 ){
1446 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1447 }else{
1448 rc = SQLITE_OK;
1449 if( !id->locked ){
1450 id->pOpen->nLock++;
1451 id->locked = 1;
1452 }
1453 id->pLock->cnt = -1;
1454 }
1455 }else{
1456 rc = SQLITE_BUSY;
1457 }
1458 sqliteOsLeaveMutex();
1459 return rc;
1460 #endif
1461 #if OS_WIN
1462 int rc;
1463 if( id->locked<0 ){
1464 rc = SQLITE_OK;
1465 }else{
1466 int res;
1467 int cnt = 100;
1468 while( cnt-->0 && (res = LockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0))==0 ){
1469 Sleep(1);
1470 }
1471 if( res ){
1472 if( id->locked>0 ){
1473 if( isNT() ){
1474 UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1475 }else{
1476 res = UnlockFile(id->h, FIRST_LOCKBYTE + id->locked, 0, 1, 0);
1477 }
1478 }
1479 if( res ){
1480 res = LockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1481 }else{
1482 res = 0;
1483 }
1484 UnlockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0);
1485 }
1486 if( res ){
1487 id->locked = -1;
1488 rc = SQLITE_OK;
1489 }else{
1490 rc = SQLITE_BUSY;
1491 }
1492 }
1493 return rc;
1494 #endif
1495 #if OS_MAC
1496 int rc;
1497 if( id->locked<0 || id->refNumRF == -1 ){
1498 rc = SQLITE_OK;
1499 }else{
1500 OSErr res;
1501 int cnt = 5;
1502 ParamBlockRec params;
1503 memset(¶ms, 0, sizeof(params));
1504 params.ioParam.ioRefNum = id->refNumRF;
1505 params.ioParam.ioPosMode = fsFromStart;
1506 params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1507 params.ioParam.ioReqCount = 1;
1508 while( cnt-->0 && (res = PBLockRangeSync(¶ms))!=noErr ){
1509 UInt32 finalTicks;
1510 Delay(1, &finalTicks); /* 1/60 sec */
1511 }
1512 if( res == noErr ){
1513 params.ioParam.ioPosOffset = FIRST_LOCKBYTE + id->locked;
1514 params.ioParam.ioReqCount = 1;
1515 if( id->locked==0
1516 || PBUnlockRangeSync(¶ms)==noErr ){
1517 params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1518 params.ioParam.ioReqCount = N_LOCKBYTE;
1519 res = PBLockRangeSync(¶ms);
1520 }else{
1521 res = afpRangeNotLocked;
1522 }
1523 params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1524 params.ioParam.ioReqCount = 1;
1525 PBUnlockRangeSync(¶ms);
1526 }
1527 if( res == noErr ){
1528 id->locked = -1;
1529 rc = SQLITE_OK;
1530 }else{
1531 rc = SQLITE_BUSY;
1532 }
1533 }
1534 return rc;
1535 #endif
1536 }
1537
1538 /*
1539 ** Unlock the given file descriptor. If the file descriptor was
1540 ** not previously locked, then this routine is a no-op. If this
1541 ** library was compiled with large file support (LFS) but LFS is not
1542 ** available on the host, then an SQLITE_NOLFS is returned.
1543 */
1544 int sqliteOsUnlock(OsFile *id){
1545 #if OS_UNIX
1546 int rc;
1547 if( !id->locked ) return SQLITE_OK;
1548 sqliteOsEnterMutex();
1549 assert( id->pLock->cnt!=0 );
1550 if( id->pLock->cnt>1 ){
1551 id->pLock->cnt--;
1552 rc = SQLITE_OK;
1553 }else{
1554 struct flock lock;
1555 int s;
1556 lock.l_type = F_UNLCK;
1557 lock.l_whence = SEEK_SET;
1558 lock.l_start = lock.l_len = 0L;
1559 s = fcntl(id->fd, F_SETLK, &lock);
1560 if( s!=0 ){
1561 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1562 }else{
1563 rc = SQLITE_OK;
1564 id->pLock->cnt = 0;
1565 }
1566 }
1567 if( rc==SQLITE_OK ){
1568 /* Decrement the count of locks against this same file. When the
1569 ** count reaches zero, close any other file descriptors whose close
1570 ** was deferred because of outstanding locks.
1571 */
1572 struct openCnt *pOpen = id->pOpen;
1573 pOpen->nLock--;
1574 assert( pOpen->nLock>=0 );
1575 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1576 int i;
1577 for(i=0; i<pOpen->nPending; i++){
1578 close(pOpen->aPending[i]);
1579 }
1580 sqliteFree(pOpen->aPending);
1581 pOpen->nPending = 0;
1582 pOpen->aPending = 0;
1583 }
1584 }
1585 sqliteOsLeaveMutex();
1586 id->locked = 0;
1587 return rc;
1588 #endif
1589 #if OS_WIN
1590 int rc;
1591 if( id->locked==0 ){
1592 rc = SQLITE_OK;
1593 }else if( isNT() || id->locked<0 ){
1594 UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1595 rc = SQLITE_OK;
1596 id->locked = 0;
1597 }else{
1598 UnlockFile(id->h, FIRST_LOCKBYTE+id->locked, 0, 1, 0);
1599 rc = SQLITE_OK;
1600 id->locked = 0;
1601 }
1602 return rc;
1603 #endif
1604 #if OS_MAC
1605 int rc;
1606 ParamBlockRec params;
1607 memset(¶ms, 0, sizeof(params));
1608 params.ioParam.ioRefNum = id->refNumRF;
1609 params.ioParam.ioPosMode = fsFromStart;
1610 if( id->locked==0 || id->refNumRF == -1 ){
1611 rc = SQLITE_OK;
1612 }else if( id->locked<0 ){
1613 params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1614 params.ioParam.ioReqCount = N_LOCKBYTE;
1615 PBUnlockRangeSync(¶ms);
1616 rc = SQLITE_OK;
1617 id->locked = 0;
1618 }else{
1619 params.ioParam.ioPosOffset = FIRST_LOCKBYTE+id->locked;
1620 params.ioParam.ioReqCount = 1;
1621 PBUnlockRangeSync(¶ms);
1622 rc = SQLITE_OK;
1623 id->locked = 0;
1624 }
1625 return rc;
1626 #endif
1627 }
1628
1629 /*
1630 ** Get information to seed the random number generator. The seed
1631 ** is written into the buffer zBuf[256]. The calling function must
1632 ** supply a sufficiently large buffer.
1633 */
1634 int sqliteOsRandomSeed(char *zBuf){
1635 /* We have to initialize zBuf to prevent valgrind from reporting
1636 ** errors. The reports issued by valgrind are incorrect - we would
1637 ** prefer that the randomness be increased by making use of the
1638 ** uninitialized space in zBuf - but valgrind errors tend to worry
1639 ** some users. Rather than argue, it seems easier just to initialize
1640 ** the whole array and silence valgrind, even if that means less randomness
1641 ** in the random seed.
1642 **
1643 ** When testing, initializing zBuf[] to zero is all we do. That means
1644 ** that we always use the same random number sequence.* This makes the
1645 ** tests repeatable.
1646 */
1647 memset(zBuf, 0, 256);
1648 #if OS_UNIX && !defined(SQLITE_TEST)
1649 {
1650 int pid;
1651 time((time_t*)zBuf);
1652 pid = getpid();
1653 memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
1654 }
1655 #endif
1656 #if OS_WIN && !defined(SQLITE_TEST)
1657 GetSystemTime((LPSYSTEMTIME)zBuf);
1658 #endif
1659 #if OS_MAC
1660 {
1661 int pid;
1662 Microseconds((UnsignedWide*)zBuf);
1663 pid = getpid();
1664 memcpy(&zBuf[sizeof(UnsignedWide)], &pid, sizeof(pid));
1665 }
1666 #endif
1667 return SQLITE_OK;
1668 }
1669
1670 /*
1671 ** Sleep for a little while. Return the amount of time slept.
1672 */
1673 int sqliteOsSleep(int ms){
1674 #if OS_UNIX
1675 #if defined(HAVE_USLEEP) && HAVE_USLEEP
1676 usleep(ms*1000);
1677 return ms;
1678 #else
1679 sleep((ms+999)/1000);
1680 return 1000*((ms+999)/1000);
1681 #endif
1682 #endif
1683 #if OS_WIN
1684 Sleep(ms);
1685 return ms;
1686 #endif
1687 #if OS_MAC
1688 UInt32 finalTicks;
1689 UInt32 ticks = (((UInt32)ms+16)*3)/50; /* 1/60 sec per tick */
1690 Delay(ticks, &finalTicks);
1691 return (int)((ticks*50)/3);
1692 #endif
1693 }
1694
1695 /*
1696 ** Static variables used for thread synchronization
1697 */
1698 static int inMutex = 0;
1699 #ifdef SQLITE_UNIX_THREADS
1700 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
1701 #endif
1702 #ifdef SQLITE_W32_THREADS
1703 static CRITICAL_SECTION cs;
1704 #endif
1705 #ifdef SQLITE_MACOS_MULTITASKING
1706 static MPCriticalRegionID criticalRegion;
1707 #endif
1708
1709 /*
1710 ** The following pair of routine implement mutual exclusion for
1711 ** multi-threaded processes. Only a single thread is allowed to
1712 ** executed code that is surrounded by EnterMutex() and LeaveMutex().
1713 **
1714 ** SQLite uses only a single Mutex. There is not much critical
1715 ** code and what little there is executes quickly and without blocking.
1716 */
1717 void sqliteOsEnterMutex(){
1718 #ifdef SQLITE_UNIX_THREADS
1719 pthread_mutex_lock(&mutex);
1720 #endif
1721 #ifdef SQLITE_W32_THREADS
1722 static int isInit = 0;
1723 while( !isInit ){
1724 static long lock = 0;
1725 if( InterlockedIncrement(&lock)==1 ){
1726 InitializeCriticalSection(&cs);
1727 isInit = 1;
1728 }else{
1729 Sleep(1);
1730 }
1731 }
1732 EnterCriticalSection(&cs);
1733 #endif
1734 #ifdef SQLITE_MACOS_MULTITASKING
1735 static volatile int notInit = 1;
1736 if( notInit ){
1737 if( notInit == 2 ) /* as close as you can get to thread safe init */
1738 MPYield();
1739 else{
1740 notInit = 2;
1741 MPCreateCriticalRegion(&criticalRegion);
1742 notInit = 0;
1743 }
1744 }
1745 MPEnterCriticalRegion(criticalRegion, kDurationForever);
1746 #endif
1747 assert( !inMutex );
1748 inMutex = 1;
1749 }
1750 void sqliteOsLeaveMutex(){
1751 assert( inMutex );
1752 inMutex = 0;
1753 #ifdef SQLITE_UNIX_THREADS
1754 pthread_mutex_unlock(&mutex);
1755 #endif
1756 #ifdef SQLITE_W32_THREADS
1757 LeaveCriticalSection(&cs);
1758 #endif
1759 #ifdef SQLITE_MACOS_MULTITASKING
1760 MPExitCriticalRegion(criticalRegion);
1761 #endif
1762 }
1763
1764 /*
1765 ** Turn a relative pathname into a full pathname. Return a pointer
1766 ** to the full pathname stored in space obtained from sqliteMalloc().
1767 ** The calling function is responsible for freeing this space once it
1768 ** is no longer needed.
1769 */
1770 char *sqliteOsFullPathname(const char *zRelative){
1771 #if OS_UNIX
1772 char *zFull = 0;
1773 if( zRelative[0]=='/' ){
1774 sqliteSetString(&zFull, zRelative, (char*)0);
1775 }else{
1776 char zBuf[5000];
1777 sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), "/", zRelative,
1778 (char*)0);
1779 }
1780 return zFull;
1781 #endif
1782 #if OS_WIN
1783 char *zNotUsed;
1784 char *zFull;
1785 int nByte;
1786 nByte = GetFullPathName(zRelative, 0, 0, &zNotUsed) + 1;
1787 zFull = sqliteMalloc( nByte );
1788 if( zFull==0 ) return 0;
1789 GetFullPathName(zRelative, nByte, zFull, &zNotUsed);
1790 return zFull;
1791 #endif
1792 #if OS_MAC
1793 char *zFull = 0;
1794 if( zRelative[0]==':' ){
1795 char zBuf[_MAX_PATH+1];
1796 sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), &(zRelative[1]),
1797 (char*)0);
1798 }else{
1799 if( strchr(zRelative, ':') ){
1800 sqliteSetString(&zFull, zRelative, (char*)0);
1801 }else{
1802 char zBuf[_MAX_PATH+1];
1803 sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), zRelative, (char*)0);
1804 }
1805 }
1806 return zFull;
1807 #endif
1808 }
1809
1810 /*
1811 ** The following variable, if set to a non-zero value, becomes the result
1812 ** returned from sqliteOsCurrentTime(). This is used for testing.
1813 */
1814 #ifdef SQLITE_TEST
1815 int sqlite_current_time = 0;
1816 #endif
1817
1818 /*
1819 ** Find the current time (in Universal Coordinated Time). Write the
1820 ** current time and date as a Julian Day number into *prNow and
1821 ** return 0. Return 1 if the time and date cannot be found.
1822 */
1823 int sqliteOsCurrentTime(double *prNow){
1824 #if OS_UNIX
1825 time_t t;
1826 time(&t);
1827 *prNow = t/86400.0 + 2440587.5;
1828 #endif
1829 #if OS_WIN
1830 FILETIME ft;
1831 /* FILETIME structure is a 64-bit value representing the number of
1832 100-nanosecond intervals since January 1, 1601 (= JD 2305813.5).
1833 */
1834 double now;
1835 GetSystemTimeAsFileTime( &ft );
1836 now = ((double)ft.dwHighDateTime) * 4294967296.0;
1837 *prNow = (now + ft.dwLowDateTime)/864000000000.0 + 2305813.5;
1838 #endif
1839 #ifdef SQLITE_TEST
1840 if( sqlite_current_time ){
1841 *prNow = sqlite_current_time/86400.0 + 2440587.5;
1842 }
1843 #endif
1844 return 0;
1845 }
1846