xref: /illumos-gate/usr/src/lib/libsqlite/src/os.c (revision 1da57d551424de5a9d469760be7c4b4d4f10a755)
1 /*
2 ** 2001 September 16
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains code that is specific to particular operating
14 ** systems.  The purpose of this file is to provide a uniform abstraction
15 ** on which the rest of SQLite can operate.
16 */
17 #include "os.h"          /* Must be first to enable large file support */
18 #include "sqliteInt.h"
19 
20 #if OS_UNIX
21 # include <time.h>
22 # include <errno.h>
23 # include <unistd.h>
24 # ifndef O_LARGEFILE
25 #  define O_LARGEFILE 0
26 # endif
27 # ifdef SQLITE_DISABLE_LFS
28 #  undef O_LARGEFILE
29 #  define O_LARGEFILE 0
30 # endif
31 # ifndef O_NOFOLLOW
32 #  define O_NOFOLLOW 0
33 # endif
34 # ifndef O_BINARY
35 #  define O_BINARY 0
36 # endif
37 #endif
38 
39 
40 #if OS_WIN
41 # include <winbase.h>
42 #endif
43 
44 #if OS_MAC
45 # include <extras.h>
46 # include <path2fss.h>
47 # include <TextUtils.h>
48 # include <FinderRegistry.h>
49 # include <Folders.h>
50 # include <Timer.h>
51 # include <OSUtils.h>
52 #endif
53 
54 /*
55 ** The DJGPP compiler environment looks mostly like Unix, but it
56 ** lacks the fcntl() system call.  So redefine fcntl() to be something
57 ** that always succeeds.  This means that locking does not occur under
58 ** DJGPP.  But its DOS - what did you expect?
59 */
60 #ifdef __DJGPP__
61 # define fcntl(A,B,C) 0
62 #endif
63 
64 /*
65 ** Macros used to determine whether or not to use threads.  The
66 ** SQLITE_UNIX_THREADS macro is defined if we are synchronizing for
67 ** Posix threads and SQLITE_W32_THREADS is defined if we are
68 ** synchronizing using Win32 threads.
69 */
70 #if OS_UNIX && defined(THREADSAFE) && THREADSAFE
71 # include <pthread.h>
72 # define SQLITE_UNIX_THREADS 1
73 #endif
74 #if OS_WIN && defined(THREADSAFE) && THREADSAFE
75 # define SQLITE_W32_THREADS 1
76 #endif
77 #if OS_MAC && defined(THREADSAFE) && THREADSAFE
78 # include <Multiprocessing.h>
79 # define SQLITE_MACOS_MULTITASKING 1
80 #endif
81 
82 /*
83 ** Macros for performance tracing.  Normally turned off
84 */
85 #if 0
86 static int last_page = 0;
87 __inline__ unsigned long long int hwtime(void){
88   unsigned long long int x;
89   __asm__("rdtsc\n\t"
90           "mov %%edx, %%ecx\n\t"
91           :"=A" (x));
92   return x;
93 }
94 static unsigned long long int g_start;
95 static unsigned int elapse;
96 #define TIMER_START       g_start=hwtime()
97 #define TIMER_END         elapse=hwtime()-g_start
98 #define SEEK(X)           last_page=(X)
99 #define TRACE1(X)         fprintf(stderr,X)
100 #define TRACE2(X,Y)       fprintf(stderr,X,Y)
101 #define TRACE3(X,Y,Z)     fprintf(stderr,X,Y,Z)
102 #define TRACE4(X,Y,Z,A)   fprintf(stderr,X,Y,Z,A)
103 #define TRACE5(X,Y,Z,A,B) fprintf(stderr,X,Y,Z,A,B)
104 #else
105 #define TIMER_START
106 #define TIMER_END
107 #define SEEK(X)
108 #define TRACE1(X)
109 #define TRACE2(X,Y)
110 #define TRACE3(X,Y,Z)
111 #define TRACE4(X,Y,Z,A)
112 #define TRACE5(X,Y,Z,A,B)
113 #endif
114 
115 
116 #if OS_UNIX
117 /*
118 ** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
119 ** section 6.5.2.2 lines 483 through 490 specify that when a process
120 ** sets or clears a lock, that operation overrides any prior locks set
121 ** by the same process.  It does not explicitly say so, but this implies
122 ** that it overrides locks set by the same process using a different
123 ** file descriptor.  Consider this test case:
124 **
125 **       int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
126 **       int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
127 **
128 ** Suppose ./file1 and ./file2 are really the same file (because
129 ** one is a hard or symbolic link to the other) then if you set
130 ** an exclusive lock on fd1, then try to get an exclusive lock
131 ** on fd2, it works.  I would have expected the second lock to
132 ** fail since there was already a lock on the file due to fd1.
133 ** But not so.  Since both locks came from the same process, the
134 ** second overrides the first, even though they were on different
135 ** file descriptors opened on different file names.
136 **
137 ** Bummer.  If you ask me, this is broken.  Badly broken.  It means
138 ** that we cannot use POSIX locks to synchronize file access among
139 ** competing threads of the same process.  POSIX locks will work fine
140 ** to synchronize access for threads in separate processes, but not
141 ** threads within the same process.
142 **
143 ** To work around the problem, SQLite has to manage file locks internally
144 ** on its own.  Whenever a new database is opened, we have to find the
145 ** specific inode of the database file (the inode is determined by the
146 ** st_dev and st_ino fields of the stat structure that fstat() fills in)
147 ** and check for locks already existing on that inode.  When locks are
148 ** created or removed, we have to look at our own internal record of the
149 ** locks to see if another thread has previously set a lock on that same
150 ** inode.
151 **
152 ** The OsFile structure for POSIX is no longer just an integer file
153 ** descriptor.  It is now a structure that holds the integer file
154 ** descriptor and a pointer to a structure that describes the internal
155 ** locks on the corresponding inode.  There is one locking structure
156 ** per inode, so if the same inode is opened twice, both OsFile structures
157 ** point to the same locking structure.  The locking structure keeps
158 ** a reference count (so we will know when to delete it) and a "cnt"
159 ** field that tells us its internal lock status.  cnt==0 means the
160 ** file is unlocked.  cnt==-1 means the file has an exclusive lock.
161 ** cnt>0 means there are cnt shared locks on the file.
162 **
163 ** Any attempt to lock or unlock a file first checks the locking
164 ** structure.  The fcntl() system call is only invoked to set a
165 ** POSIX lock if the internal lock structure transitions between
166 ** a locked and an unlocked state.
167 **
168 ** 2004-Jan-11:
169 ** More recent discoveries about POSIX advisory locks.  (The more
170 ** I discover, the more I realize the a POSIX advisory locks are
171 ** an abomination.)
172 **
173 ** If you close a file descriptor that points to a file that has locks,
174 ** all locks on that file that are owned by the current process are
175 ** released.  To work around this problem, each OsFile structure contains
176 ** a pointer to an openCnt structure.  There is one openCnt structure
177 ** per open inode, which means that multiple OsFiles can point to a single
178 ** openCnt.  When an attempt is made to close an OsFile, if there are
179 ** other OsFiles open on the same inode that are holding locks, the call
180 ** to close() the file descriptor is deferred until all of the locks clear.
181 ** The openCnt structure keeps a list of file descriptors that need to
182 ** be closed and that list is walked (and cleared) when the last lock
183 ** clears.
184 **
185 ** First, under Linux threads, because each thread has a separate
186 ** process ID, lock operations in one thread do not override locks
187 ** to the same file in other threads.  Linux threads behave like
188 ** separate processes in this respect.  But, if you close a file
189 ** descriptor in linux threads, all locks are cleared, even locks
190 ** on other threads and even though the other threads have different
191 ** process IDs.  Linux threads is inconsistent in this respect.
192 ** (I'm beginning to think that linux threads is an abomination too.)
193 ** The consequence of this all is that the hash table for the lockInfo
194 ** structure has to include the process id as part of its key because
195 ** locks in different threads are treated as distinct.  But the
196 ** openCnt structure should not include the process id in its
197 ** key because close() clears lock on all threads, not just the current
198 ** thread.  Were it not for this goofiness in linux threads, we could
199 ** combine the lockInfo and openCnt structures into a single structure.
200 */
201 
202 /*
203 ** An instance of the following structure serves as the key used
204 ** to locate a particular lockInfo structure given its inode.  Note
205 ** that we have to include the process ID as part of the key.  On some
206 ** threading implementations (ex: linux), each thread has a separate
207 ** process ID.
208 */
209 struct lockKey {
210   dev_t dev;   /* Device number */
211   ino_t ino;   /* Inode number */
212   pid_t pid;   /* Process ID */
213 };
214 
215 /*
216 ** An instance of the following structure is allocated for each open
217 ** inode on each thread with a different process ID.  (Threads have
218 ** different process IDs on linux, but not on most other unixes.)
219 **
220 ** A single inode can have multiple file descriptors, so each OsFile
221 ** structure contains a pointer to an instance of this object and this
222 ** object keeps a count of the number of OsFiles pointing to it.
223 */
224 struct lockInfo {
225   struct lockKey key;  /* The lookup key */
226   int cnt;             /* 0: unlocked.  -1: write lock.  1...: read lock. */
227   int nRef;            /* Number of pointers to this structure */
228 };
229 
230 /*
231 ** An instance of the following structure serves as the key used
232 ** to locate a particular openCnt structure given its inode.  This
233 ** is the same as the lockKey except that the process ID is omitted.
234 */
235 struct openKey {
236   dev_t dev;   /* Device number */
237   ino_t ino;   /* Inode number */
238 };
239 
240 /*
241 ** An instance of the following structure is allocated for each open
242 ** inode.  This structure keeps track of the number of locks on that
243 ** inode.  If a close is attempted against an inode that is holding
244 ** locks, the close is deferred until all locks clear by adding the
245 ** file descriptor to be closed to the pending list.
246 */
247 struct openCnt {
248   struct openKey key;   /* The lookup key */
249   int nRef;             /* Number of pointers to this structure */
250   int nLock;            /* Number of outstanding locks */
251   int nPending;         /* Number of pending close() operations */
252   int *aPending;        /* Malloced space holding fd's awaiting a close() */
253 };
254 
255 /*
256 ** These hash table maps inodes and process IDs into lockInfo and openCnt
257 ** structures.  Access to these hash tables must be protected by a mutex.
258 */
259 static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
260 static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
261 
262 /*
263 ** Release a lockInfo structure previously allocated by findLockInfo().
264 */
releaseLockInfo(struct lockInfo * pLock)265 static void releaseLockInfo(struct lockInfo *pLock){
266   pLock->nRef--;
267   if( pLock->nRef==0 ){
268     sqliteHashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
269     sqliteFree(pLock);
270   }
271 }
272 
273 /*
274 ** Release a openCnt structure previously allocated by findLockInfo().
275 */
releaseOpenCnt(struct openCnt * pOpen)276 static void releaseOpenCnt(struct openCnt *pOpen){
277   pOpen->nRef--;
278   if( pOpen->nRef==0 ){
279     sqliteHashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
280     sqliteFree(pOpen->aPending);
281     sqliteFree(pOpen);
282   }
283 }
284 
285 /*
286 ** Given a file descriptor, locate lockInfo and openCnt structures that
287 ** describes that file descriptor.  Create a new ones if necessary.  The
288 ** return values might be unset if an error occurs.
289 **
290 ** Return the number of errors.
291 */
findLockInfo(int fd,struct lockInfo ** ppLock,struct openCnt ** ppOpen)292 int findLockInfo(
293   int fd,                      /* The file descriptor used in the key */
294   struct lockInfo **ppLock,    /* Return the lockInfo structure here */
295   struct openCnt **ppOpen   /* Return the openCnt structure here */
296 ){
297   int rc;
298   struct lockKey key1;
299   struct openKey key2;
300   struct stat statbuf;
301   struct lockInfo *pLock;
302   struct openCnt *pOpen;
303   rc = fstat(fd, &statbuf);
304   if( rc!=0 ) return 1;
305   memset(&key1, 0, sizeof(key1));
306   key1.dev = statbuf.st_dev;
307   key1.ino = statbuf.st_ino;
308   key1.pid = getpid();
309   memset(&key2, 0, sizeof(key2));
310   key2.dev = statbuf.st_dev;
311   key2.ino = statbuf.st_ino;
312   pLock = (struct lockInfo*)sqliteHashFind(&lockHash, &key1, sizeof(key1));
313   if( pLock==0 ){
314     struct lockInfo *pOld;
315     pLock = sqliteMallocRaw( sizeof(*pLock) );
316     if( pLock==0 ) return 1;
317     pLock->key = key1;
318     pLock->nRef = 1;
319     pLock->cnt = 0;
320     pOld = sqliteHashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
321     if( pOld!=0 ){
322       assert( pOld==pLock );
323       sqliteFree(pLock);
324       return 1;
325     }
326   }else{
327     pLock->nRef++;
328   }
329   *ppLock = pLock;
330   pOpen = (struct openCnt*)sqliteHashFind(&openHash, &key2, sizeof(key2));
331   if( pOpen==0 ){
332     struct openCnt *pOld;
333     pOpen = sqliteMallocRaw( sizeof(*pOpen) );
334     if( pOpen==0 ){
335       releaseLockInfo(pLock);
336       return 1;
337     }
338     pOpen->key = key2;
339     pOpen->nRef = 1;
340     pOpen->nLock = 0;
341     pOpen->nPending = 0;
342     pOpen->aPending = 0;
343     pOld = sqliteHashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
344     if( pOld!=0 ){
345       assert( pOld==pOpen );
346       sqliteFree(pOpen);
347       releaseLockInfo(pLock);
348       return 1;
349     }
350   }else{
351     pOpen->nRef++;
352   }
353   *ppOpen = pOpen;
354   return 0;
355 }
356 
357 #endif  /** POSIX advisory lock work-around **/
358 
359 /*
360 ** If we compile with the SQLITE_TEST macro set, then the following block
361 ** of code will give us the ability to simulate a disk I/O error.  This
362 ** is used for testing the I/O recovery logic.
363 */
364 #ifdef SQLITE_TEST
365 int sqlite_io_error_pending = 0;
366 #define SimulateIOError(A)  \
367    if( sqlite_io_error_pending ) \
368      if( sqlite_io_error_pending-- == 1 ){ local_ioerr(); return A; }
local_ioerr()369 static void local_ioerr(){
370   sqlite_io_error_pending = 0;  /* Really just a place to set a breakpoint */
371 }
372 #else
373 #define SimulateIOError(A)
374 #endif
375 
376 /*
377 ** When testing, keep a count of the number of open files.
378 */
379 #ifdef SQLITE_TEST
380 int sqlite_open_file_count = 0;
381 #define OpenCounter(X)  sqlite_open_file_count+=(X)
382 #else
383 #define OpenCounter(X)
384 #endif
385 
386 
387 /*
388 ** Delete the named file
389 */
sqliteOsDelete(const char * zFilename)390 int sqliteOsDelete(const char *zFilename){
391 #if OS_UNIX
392   unlink(zFilename);
393 #endif
394 #if OS_WIN
395   DeleteFile(zFilename);
396 #endif
397 #if OS_MAC
398   unlink(zFilename);
399 #endif
400   return SQLITE_OK;
401 }
402 
403 /*
404 ** Return TRUE if the named file exists.
405 */
sqliteOsFileExists(const char * zFilename)406 int sqliteOsFileExists(const char *zFilename){
407 #if OS_UNIX
408   return access(zFilename, 0)==0;
409 #endif
410 #if OS_WIN
411   return GetFileAttributes(zFilename) != 0xffffffff;
412 #endif
413 #if OS_MAC
414   return access(zFilename, 0)==0;
415 #endif
416 }
417 
418 
419 #if 0 /* NOT USED */
420 /*
421 ** Change the name of an existing file.
422 */
423 int sqliteOsFileRename(const char *zOldName, const char *zNewName){
424 #if OS_UNIX
425   if( link(zOldName, zNewName) ){
426     return SQLITE_ERROR;
427   }
428   unlink(zOldName);
429   return SQLITE_OK;
430 #endif
431 #if OS_WIN
432   if( !MoveFile(zOldName, zNewName) ){
433     return SQLITE_ERROR;
434   }
435   return SQLITE_OK;
436 #endif
437 #if OS_MAC
438   /**** FIX ME ***/
439   return SQLITE_ERROR;
440 #endif
441 }
442 #endif /* NOT USED */
443 
444 /*
445 ** Attempt to open a file for both reading and writing.  If that
446 ** fails, try opening it read-only.  If the file does not exist,
447 ** try to create it.
448 **
449 ** On success, a handle for the open file is written to *id
450 ** and *pReadonly is set to 0 if the file was opened for reading and
451 ** writing or 1 if the file was opened read-only.  The function returns
452 ** SQLITE_OK.
453 **
454 ** On failure, the function returns SQLITE_CANTOPEN and leaves
455 ** *id and *pReadonly unchanged.
456 */
sqliteOsOpenReadWrite(const char * zFilename,OsFile * id,int * pReadonly)457 int sqliteOsOpenReadWrite(
458   const char *zFilename,
459   OsFile *id,
460   int *pReadonly
461 ){
462 #if OS_UNIX
463   int rc;
464   id->dirfd = -1;
465   id->fd = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY, 0644);
466   if( id->fd<0 ){
467 #ifdef EISDIR
468     if( errno==EISDIR ){
469       return SQLITE_CANTOPEN;
470     }
471 #endif
472     id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
473     if( id->fd<0 ){
474       return SQLITE_CANTOPEN;
475     }
476     *pReadonly = 1;
477   }else{
478     *pReadonly = 0;
479   }
480   sqliteOsEnterMutex();
481   rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
482   sqliteOsLeaveMutex();
483   if( rc ){
484     close(id->fd);
485     return SQLITE_NOMEM;
486   }
487   id->locked = 0;
488   TRACE3("OPEN    %-3d %s\n", id->fd, zFilename);
489   OpenCounter(+1);
490   return SQLITE_OK;
491 #endif
492 #if OS_WIN
493   HANDLE h = CreateFile(zFilename,
494      GENERIC_READ | GENERIC_WRITE,
495      FILE_SHARE_READ | FILE_SHARE_WRITE,
496      NULL,
497      OPEN_ALWAYS,
498      FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
499      NULL
500   );
501   if( h==INVALID_HANDLE_VALUE ){
502     h = CreateFile(zFilename,
503        GENERIC_READ,
504        FILE_SHARE_READ,
505        NULL,
506        OPEN_ALWAYS,
507        FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
508        NULL
509     );
510     if( h==INVALID_HANDLE_VALUE ){
511       return SQLITE_CANTOPEN;
512     }
513     *pReadonly = 1;
514   }else{
515     *pReadonly = 0;
516   }
517   id->h = h;
518   id->locked = 0;
519   OpenCounter(+1);
520   return SQLITE_OK;
521 #endif
522 #if OS_MAC
523   FSSpec fsSpec;
524 # ifdef _LARGE_FILE
525   HFSUniStr255 dfName;
526   FSRef fsRef;
527   if( __path2fss(zFilename, &fsSpec) != noErr ){
528     if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
529       return SQLITE_CANTOPEN;
530   }
531   if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
532     return SQLITE_CANTOPEN;
533   FSGetDataForkName(&dfName);
534   if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
535                  fsRdWrShPerm, &(id->refNum)) != noErr ){
536     if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
537                    fsRdWrPerm, &(id->refNum)) != noErr ){
538       if (FSOpenFork(&fsRef, dfName.length, dfName.unicode,
539                    fsRdPerm, &(id->refNum)) != noErr )
540         return SQLITE_CANTOPEN;
541       else
542         *pReadonly = 1;
543     } else
544       *pReadonly = 0;
545   } else
546     *pReadonly = 0;
547 # else
548   __path2fss(zFilename, &fsSpec);
549   if( !sqliteOsFileExists(zFilename) ){
550     if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
551       return SQLITE_CANTOPEN;
552   }
553   if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNum)) != noErr ){
554     if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrPerm, &(id->refNum)) != noErr ){
555       if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdPerm, &(id->refNum)) != noErr )
556         return SQLITE_CANTOPEN;
557       else
558         *pReadonly = 1;
559     } else
560       *pReadonly = 0;
561   } else
562     *pReadonly = 0;
563 # endif
564   if( HOpenRF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNumRF)) != noErr){
565     id->refNumRF = -1;
566   }
567   id->locked = 0;
568   id->delOnClose = 0;
569   OpenCounter(+1);
570   return SQLITE_OK;
571 #endif
572 }
573 
574 
575 /*
576 ** Attempt to open a new file for exclusive access by this process.
577 ** The file will be opened for both reading and writing.  To avoid
578 ** a potential security problem, we do not allow the file to have
579 ** previously existed.  Nor do we allow the file to be a symbolic
580 ** link.
581 **
582 ** If delFlag is true, then make arrangements to automatically delete
583 ** the file when it is closed.
584 **
585 ** On success, write the file handle into *id and return SQLITE_OK.
586 **
587 ** On failure, return SQLITE_CANTOPEN.
588 */
sqliteOsOpenExclusive(const char * zFilename,OsFile * id,int delFlag)589 int sqliteOsOpenExclusive(const char *zFilename, OsFile *id, int delFlag){
590 #if OS_UNIX
591   int rc;
592   if( access(zFilename, 0)==0 ){
593     return SQLITE_CANTOPEN;
594   }
595   id->dirfd = -1;
596   id->fd = open(zFilename,
597                 O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY, 0600);
598   if( id->fd<0 ){
599     return SQLITE_CANTOPEN;
600   }
601   sqliteOsEnterMutex();
602   rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
603   sqliteOsLeaveMutex();
604   if( rc ){
605     close(id->fd);
606     unlink(zFilename);
607     return SQLITE_NOMEM;
608   }
609   id->locked = 0;
610   if( delFlag ){
611     unlink(zFilename);
612   }
613   TRACE3("OPEN-EX %-3d %s\n", id->fd, zFilename);
614   OpenCounter(+1);
615   return SQLITE_OK;
616 #endif
617 #if OS_WIN
618   HANDLE h;
619   int fileflags;
620   if( delFlag ){
621     fileflags = FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_RANDOM_ACCESS
622                      | FILE_FLAG_DELETE_ON_CLOSE;
623   }else{
624     fileflags = FILE_FLAG_RANDOM_ACCESS;
625   }
626   h = CreateFile(zFilename,
627      GENERIC_READ | GENERIC_WRITE,
628      0,
629      NULL,
630      CREATE_ALWAYS,
631      fileflags,
632      NULL
633   );
634   if( h==INVALID_HANDLE_VALUE ){
635     return SQLITE_CANTOPEN;
636   }
637   id->h = h;
638   id->locked = 0;
639   OpenCounter(+1);
640   return SQLITE_OK;
641 #endif
642 #if OS_MAC
643   FSSpec fsSpec;
644 # ifdef _LARGE_FILE
645   HFSUniStr255 dfName;
646   FSRef fsRef;
647   __path2fss(zFilename, &fsSpec);
648   if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
649     return SQLITE_CANTOPEN;
650   if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
651     return SQLITE_CANTOPEN;
652   FSGetDataForkName(&dfName);
653   if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
654                  fsRdWrPerm, &(id->refNum)) != noErr )
655     return SQLITE_CANTOPEN;
656 # else
657   __path2fss(zFilename, &fsSpec);
658   if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
659     return SQLITE_CANTOPEN;
660   if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrPerm, &(id->refNum)) != noErr )
661     return SQLITE_CANTOPEN;
662 # endif
663   id->refNumRF = -1;
664   id->locked = 0;
665   id->delOnClose = delFlag;
666   if (delFlag)
667     id->pathToDel = sqliteOsFullPathname(zFilename);
668   OpenCounter(+1);
669   return SQLITE_OK;
670 #endif
671 }
672 
673 /*
674 ** Attempt to open a new file for read-only access.
675 **
676 ** On success, write the file handle into *id and return SQLITE_OK.
677 **
678 ** On failure, return SQLITE_CANTOPEN.
679 */
sqliteOsOpenReadOnly(const char * zFilename,OsFile * id)680 int sqliteOsOpenReadOnly(const char *zFilename, OsFile *id){
681 #if OS_UNIX
682   int rc;
683   id->dirfd = -1;
684   id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
685   if( id->fd<0 ){
686     return SQLITE_CANTOPEN;
687   }
688   sqliteOsEnterMutex();
689   rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
690   sqliteOsLeaveMutex();
691   if( rc ){
692     close(id->fd);
693     return SQLITE_NOMEM;
694   }
695   id->locked = 0;
696   TRACE3("OPEN-RO %-3d %s\n", id->fd, zFilename);
697   OpenCounter(+1);
698   return SQLITE_OK;
699 #endif
700 #if OS_WIN
701   HANDLE h = CreateFile(zFilename,
702      GENERIC_READ,
703      0,
704      NULL,
705      OPEN_EXISTING,
706      FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
707      NULL
708   );
709   if( h==INVALID_HANDLE_VALUE ){
710     return SQLITE_CANTOPEN;
711   }
712   id->h = h;
713   id->locked = 0;
714   OpenCounter(+1);
715   return SQLITE_OK;
716 #endif
717 #if OS_MAC
718   FSSpec fsSpec;
719 # ifdef _LARGE_FILE
720   HFSUniStr255 dfName;
721   FSRef fsRef;
722   if( __path2fss(zFilename, &fsSpec) != noErr )
723     return SQLITE_CANTOPEN;
724   if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
725     return SQLITE_CANTOPEN;
726   FSGetDataForkName(&dfName);
727   if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
728                  fsRdPerm, &(id->refNum)) != noErr )
729     return SQLITE_CANTOPEN;
730 # else
731   __path2fss(zFilename, &fsSpec);
732   if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdPerm, &(id->refNum)) != noErr )
733     return SQLITE_CANTOPEN;
734 # endif
735   if( HOpenRF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNumRF)) != noErr){
736     id->refNumRF = -1;
737   }
738   id->locked = 0;
739   id->delOnClose = 0;
740   OpenCounter(+1);
741   return SQLITE_OK;
742 #endif
743 }
744 
745 /*
746 ** Attempt to open a file descriptor for the directory that contains a
747 ** file.  This file descriptor can be used to fsync() the directory
748 ** in order to make sure the creation of a new file is actually written
749 ** to disk.
750 **
751 ** This routine is only meaningful for Unix.  It is a no-op under
752 ** windows since windows does not support hard links.
753 **
754 ** On success, a handle for a previously open file is at *id is
755 ** updated with the new directory file descriptor and SQLITE_OK is
756 ** returned.
757 **
758 ** On failure, the function returns SQLITE_CANTOPEN and leaves
759 ** *id unchanged.
760 */
sqliteOsOpenDirectory(const char * zDirname,OsFile * id)761 int sqliteOsOpenDirectory(
762   const char *zDirname,
763   OsFile *id
764 ){
765 #if OS_UNIX
766   if( id->fd<0 ){
767     /* Do not open the directory if the corresponding file is not already
768     ** open. */
769     return SQLITE_CANTOPEN;
770   }
771   assert( id->dirfd<0 );
772   id->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0644);
773   if( id->dirfd<0 ){
774     return SQLITE_CANTOPEN;
775   }
776   TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
777 #endif
778   return SQLITE_OK;
779 }
780 
781 /*
782 ** If the following global variable points to a string which is the
783 ** name of a directory, then that directory will be used to store
784 ** temporary files.
785 */
786 const char *sqlite_temp_directory = 0;
787 
788 /*
789 ** Create a temporary file name in zBuf.  zBuf must be big enough to
790 ** hold at least SQLITE_TEMPNAME_SIZE characters.
791 */
sqliteOsTempFileName(char * zBuf)792 int sqliteOsTempFileName(char *zBuf){
793 #if OS_UNIX
794   static const char *azDirs[] = {
795      0,
796      "/var/tmp",
797      "/usr/tmp",
798      "/tmp",
799      ".",
800   };
801   static unsigned char zChars[] =
802     "abcdefghijklmnopqrstuvwxyz"
803     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
804     "0123456789";
805   int i, j;
806   struct stat buf;
807   const char *zDir = ".";
808   azDirs[0] = sqlite_temp_directory;
809   for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
810     if( azDirs[i]==0 ) continue;
811     if( stat(azDirs[i], &buf) ) continue;
812     if( !S_ISDIR(buf.st_mode) ) continue;
813     if( access(azDirs[i], 07) ) continue;
814     zDir = azDirs[i];
815     break;
816   }
817   do{
818     sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
819     j = strlen(zBuf);
820     sqliteRandomness(15, &zBuf[j]);
821     for(i=0; i<15; i++, j++){
822       zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
823     }
824     zBuf[j] = 0;
825   }while( access(zBuf,0)==0 );
826 #endif
827 #if OS_WIN
828   static char zChars[] =
829     "abcdefghijklmnopqrstuvwxyz"
830     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
831     "0123456789";
832   int i, j;
833   const char *zDir;
834   char zTempPath[SQLITE_TEMPNAME_SIZE];
835   if( sqlite_temp_directory==0 ){
836     GetTempPath(SQLITE_TEMPNAME_SIZE-30, zTempPath);
837     for(i=strlen(zTempPath); i>0 && zTempPath[i-1]=='\\'; i--){}
838     zTempPath[i] = 0;
839     zDir = zTempPath;
840   }else{
841     zDir = sqlite_temp_directory;
842   }
843   for(;;){
844     sprintf(zBuf, "%s\\"TEMP_FILE_PREFIX, zDir);
845     j = strlen(zBuf);
846     sqliteRandomness(15, &zBuf[j]);
847     for(i=0; i<15; i++, j++){
848       zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
849     }
850     zBuf[j] = 0;
851     if( !sqliteOsFileExists(zBuf) ) break;
852   }
853 #endif
854 #if OS_MAC
855   static char zChars[] =
856     "abcdefghijklmnopqrstuvwxyz"
857     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
858     "0123456789";
859   int i, j;
860   char *zDir;
861   char zTempPath[SQLITE_TEMPNAME_SIZE];
862   char zdirName[32];
863   CInfoPBRec infoRec;
864   Str31 dirName;
865   memset(&infoRec, 0, sizeof(infoRec));
866   memset(zTempPath, 0, SQLITE_TEMPNAME_SIZE);
867   if( sqlite_temp_directory!=0 ){
868     zDir = sqlite_temp_directory;
869   }else if( FindFolder(kOnSystemDisk, kTemporaryFolderType,  kCreateFolder,
870        &(infoRec.dirInfo.ioVRefNum), &(infoRec.dirInfo.ioDrParID)) == noErr ){
871     infoRec.dirInfo.ioNamePtr = dirName;
872     do{
873       infoRec.dirInfo.ioFDirIndex = -1;
874       infoRec.dirInfo.ioDrDirID = infoRec.dirInfo.ioDrParID;
875       if( PBGetCatInfoSync(&infoRec) == noErr ){
876         CopyPascalStringToC(dirName, zdirName);
877         i = strlen(zdirName);
878         memmove(&(zTempPath[i+1]), zTempPath, strlen(zTempPath));
879         strcpy(zTempPath, zdirName);
880         zTempPath[i] = ':';
881       }else{
882         *zTempPath = 0;
883         break;
884       }
885     } while( infoRec.dirInfo.ioDrDirID != fsRtDirID );
886     zDir = zTempPath;
887   }
888   if( zDir[0]==0 ){
889     getcwd(zTempPath, SQLITE_TEMPNAME_SIZE-24);
890     zDir = zTempPath;
891   }
892   for(;;){
893     sprintf(zBuf, "%s"TEMP_FILE_PREFIX, zDir);
894     j = strlen(zBuf);
895     sqliteRandomness(15, &zBuf[j]);
896     for(i=0; i<15; i++, j++){
897       zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
898     }
899     zBuf[j] = 0;
900     if( !sqliteOsFileExists(zBuf) ) break;
901   }
902 #endif
903   return SQLITE_OK;
904 }
905 
906 /*
907 ** Close a file.
908 */
sqliteOsClose(OsFile * id)909 int sqliteOsClose(OsFile *id){
910 #if OS_UNIX
911   sqliteOsUnlock(id);
912   if( id->dirfd>=0 ) close(id->dirfd);
913   id->dirfd = -1;
914   sqliteOsEnterMutex();
915   if( id->pOpen->nLock ){
916     /* If there are outstanding locks, do not actually close the file just
917     ** yet because that would clear those locks.  Instead, add the file
918     ** descriptor to pOpen->aPending.  It will be automatically closed when
919     ** the last lock is cleared.
920     */
921     int *aNew;
922     struct openCnt *pOpen = id->pOpen;
923     pOpen->nPending++;
924     aNew = sqliteRealloc( pOpen->aPending, pOpen->nPending*sizeof(int) );
925     if( aNew==0 ){
926       /* If a malloc fails, just leak the file descriptor */
927     }else{
928       pOpen->aPending = aNew;
929       pOpen->aPending[pOpen->nPending-1] = id->fd;
930     }
931   }else{
932     /* There are no outstanding locks so we can close the file immediately */
933     close(id->fd);
934   }
935   releaseLockInfo(id->pLock);
936   releaseOpenCnt(id->pOpen);
937   sqliteOsLeaveMutex();
938   TRACE2("CLOSE   %-3d\n", id->fd);
939   OpenCounter(-1);
940   return SQLITE_OK;
941 #endif
942 #if OS_WIN
943   CloseHandle(id->h);
944   OpenCounter(-1);
945   return SQLITE_OK;
946 #endif
947 #if OS_MAC
948   if( id->refNumRF!=-1 )
949     FSClose(id->refNumRF);
950 # ifdef _LARGE_FILE
951   FSCloseFork(id->refNum);
952 # else
953   FSClose(id->refNum);
954 # endif
955   if( id->delOnClose ){
956     unlink(id->pathToDel);
957     sqliteFree(id->pathToDel);
958   }
959   OpenCounter(-1);
960   return SQLITE_OK;
961 #endif
962 }
963 
964 /*
965 ** Read data from a file into a buffer.  Return SQLITE_OK if all
966 ** bytes were read successfully and SQLITE_IOERR if anything goes
967 ** wrong.
968 */
sqliteOsRead(OsFile * id,void * pBuf,int amt)969 int sqliteOsRead(OsFile *id, void *pBuf, int amt){
970 #if OS_UNIX
971   int got;
972   SimulateIOError(SQLITE_IOERR);
973   TIMER_START;
974   got = read(id->fd, pBuf, amt);
975   TIMER_END;
976   TRACE4("READ    %-3d %7d %d\n", id->fd, last_page, elapse);
977   SEEK(0);
978   /* if( got<0 ) got = 0; */
979   if( got==amt ){
980     return SQLITE_OK;
981   }else{
982     return SQLITE_IOERR;
983   }
984 #endif
985 #if OS_WIN
986   DWORD got;
987   SimulateIOError(SQLITE_IOERR);
988   TRACE2("READ %d\n", last_page);
989   if( !ReadFile(id->h, pBuf, amt, &got, 0) ){
990     got = 0;
991   }
992   if( got==(DWORD)amt ){
993     return SQLITE_OK;
994   }else{
995     return SQLITE_IOERR;
996   }
997 #endif
998 #if OS_MAC
999   int got;
1000   SimulateIOError(SQLITE_IOERR);
1001   TRACE2("READ %d\n", last_page);
1002 # ifdef _LARGE_FILE
1003   FSReadFork(id->refNum, fsAtMark, 0, (ByteCount)amt, pBuf, (ByteCount*)&got);
1004 # else
1005   got = amt;
1006   FSRead(id->refNum, &got, pBuf);
1007 # endif
1008   if( got==amt ){
1009     return SQLITE_OK;
1010   }else{
1011     return SQLITE_IOERR;
1012   }
1013 #endif
1014 }
1015 
1016 /*
1017 ** Write data from a buffer into a file.  Return SQLITE_OK on success
1018 ** or some other error code on failure.
1019 */
sqliteOsWrite(OsFile * id,const void * pBuf,int amt)1020 int sqliteOsWrite(OsFile *id, const void *pBuf, int amt){
1021 #if OS_UNIX
1022   int wrote = 0;
1023   SimulateIOError(SQLITE_IOERR);
1024   TIMER_START;
1025   while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){
1026     amt -= wrote;
1027     pBuf = &((char*)pBuf)[wrote];
1028   }
1029   TIMER_END;
1030   TRACE4("WRITE   %-3d %7d %d\n", id->fd, last_page, elapse);
1031   SEEK(0);
1032   if( amt>0 ){
1033     return SQLITE_FULL;
1034   }
1035   return SQLITE_OK;
1036 #endif
1037 #if OS_WIN
1038   int rc;
1039   DWORD wrote;
1040   SimulateIOError(SQLITE_IOERR);
1041   TRACE2("WRITE %d\n", last_page);
1042   while( amt>0 && (rc = WriteFile(id->h, pBuf, amt, &wrote, 0))!=0 && wrote>0 ){
1043     amt -= wrote;
1044     pBuf = &((char*)pBuf)[wrote];
1045   }
1046   if( !rc || amt>(int)wrote ){
1047     return SQLITE_FULL;
1048   }
1049   return SQLITE_OK;
1050 #endif
1051 #if OS_MAC
1052   OSErr oserr;
1053   int wrote = 0;
1054   SimulateIOError(SQLITE_IOERR);
1055   TRACE2("WRITE %d\n", last_page);
1056   while( amt>0 ){
1057 # ifdef _LARGE_FILE
1058     oserr = FSWriteFork(id->refNum, fsAtMark, 0,
1059                         (ByteCount)amt, pBuf, (ByteCount*)&wrote);
1060 # else
1061     wrote = amt;
1062     oserr = FSWrite(id->refNum, &wrote, pBuf);
1063 # endif
1064     if( wrote == 0 || oserr != noErr)
1065       break;
1066     amt -= wrote;
1067     pBuf = &((char*)pBuf)[wrote];
1068   }
1069   if( oserr != noErr || amt>wrote ){
1070     return SQLITE_FULL;
1071   }
1072   return SQLITE_OK;
1073 #endif
1074 }
1075 
1076 /*
1077 ** Move the read/write pointer in a file.
1078 */
sqliteOsSeek(OsFile * id,off_t offset)1079 int sqliteOsSeek(OsFile *id, off_t offset){
1080   SEEK(offset/1024 + 1);
1081 #if OS_UNIX
1082   lseek(id->fd, offset, SEEK_SET);
1083   return SQLITE_OK;
1084 #endif
1085 #if OS_WIN
1086   {
1087     LONG upperBits = offset>>32;
1088     LONG lowerBits = offset & 0xffffffff;
1089     DWORD rc;
1090     rc = SetFilePointer(id->h, lowerBits, &upperBits, FILE_BEGIN);
1091     /* TRACE3("SEEK rc=0x%x upper=0x%x\n", rc, upperBits); */
1092   }
1093   return SQLITE_OK;
1094 #endif
1095 #if OS_MAC
1096   {
1097     off_t curSize;
1098     if( sqliteOsFileSize(id, &curSize) != SQLITE_OK ){
1099       return SQLITE_IOERR;
1100     }
1101     if( offset >= curSize ){
1102       if( sqliteOsTruncate(id, offset+1) != SQLITE_OK ){
1103         return SQLITE_IOERR;
1104       }
1105     }
1106 # ifdef _LARGE_FILE
1107     if( FSSetForkPosition(id->refNum, fsFromStart, offset) != noErr ){
1108 # else
1109     if( SetFPos(id->refNum, fsFromStart, offset) != noErr ){
1110 # endif
1111       return SQLITE_IOERR;
1112     }else{
1113       return SQLITE_OK;
1114     }
1115   }
1116 #endif
1117 }
1118 
1119 /*
1120 ** Make sure all writes to a particular file are committed to disk.
1121 **
1122 ** Under Unix, also make sure that the directory entry for the file
1123 ** has been created by fsync-ing the directory that contains the file.
1124 ** If we do not do this and we encounter a power failure, the directory
1125 ** entry for the journal might not exist after we reboot.  The next
1126 ** SQLite to access the file will not know that the journal exists (because
1127 ** the directory entry for the journal was never created) and the transaction
1128 ** will not roll back - possibly leading to database corruption.
1129 */
1130 int sqliteOsSync(OsFile *id){
1131 #if OS_UNIX
1132   SimulateIOError(SQLITE_IOERR);
1133   TRACE2("SYNC    %-3d\n", id->fd);
1134   if( fsync(id->fd) ){
1135     return SQLITE_IOERR;
1136   }else{
1137     if( id->dirfd>=0 ){
1138       TRACE2("DIRSYNC %-3d\n", id->dirfd);
1139       fsync(id->dirfd);
1140       close(id->dirfd);  /* Only need to sync once, so close the directory */
1141       id->dirfd = -1;    /* when we are done. */
1142     }
1143     return SQLITE_OK;
1144   }
1145 #endif
1146 #if OS_WIN
1147   if( FlushFileBuffers(id->h) ){
1148     return SQLITE_OK;
1149   }else{
1150     return SQLITE_IOERR;
1151   }
1152 #endif
1153 #if OS_MAC
1154 # ifdef _LARGE_FILE
1155   if( FSFlushFork(id->refNum) != noErr ){
1156 # else
1157   ParamBlockRec params;
1158   memset(&params, 0, sizeof(ParamBlockRec));
1159   params.ioParam.ioRefNum = id->refNum;
1160   if( PBFlushFileSync(&params) != noErr ){
1161 # endif
1162     return SQLITE_IOERR;
1163   }else{
1164     return SQLITE_OK;
1165   }
1166 #endif
1167 }
1168 
1169 /*
1170 ** Truncate an open file to a specified size
1171 */
1172 int sqliteOsTruncate(OsFile *id, off_t nByte){
1173   SimulateIOError(SQLITE_IOERR);
1174 #if OS_UNIX
1175   return ftruncate(id->fd, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
1176 #endif
1177 #if OS_WIN
1178   {
1179     LONG upperBits = nByte>>32;
1180     SetFilePointer(id->h, nByte, &upperBits, FILE_BEGIN);
1181     SetEndOfFile(id->h);
1182   }
1183   return SQLITE_OK;
1184 #endif
1185 #if OS_MAC
1186 # ifdef _LARGE_FILE
1187   if( FSSetForkSize(id->refNum, fsFromStart, nByte) != noErr){
1188 # else
1189   if( SetEOF(id->refNum, nByte) != noErr ){
1190 # endif
1191     return SQLITE_IOERR;
1192   }else{
1193     return SQLITE_OK;
1194   }
1195 #endif
1196 }
1197 
1198 /*
1199 ** Determine the current size of a file in bytes
1200 */
1201 int sqliteOsFileSize(OsFile *id, off_t *pSize){
1202 #if OS_UNIX
1203   struct stat buf;
1204   SimulateIOError(SQLITE_IOERR);
1205   if( fstat(id->fd, &buf)!=0 ){
1206     return SQLITE_IOERR;
1207   }
1208   *pSize = buf.st_size;
1209   return SQLITE_OK;
1210 #endif
1211 #if OS_WIN
1212   DWORD upperBits, lowerBits;
1213   SimulateIOError(SQLITE_IOERR);
1214   lowerBits = GetFileSize(id->h, &upperBits);
1215   *pSize = (((off_t)upperBits)<<32) + lowerBits;
1216   return SQLITE_OK;
1217 #endif
1218 #if OS_MAC
1219 # ifdef _LARGE_FILE
1220   if( FSGetForkSize(id->refNum, pSize) != noErr){
1221 # else
1222   if( GetEOF(id->refNum, pSize) != noErr ){
1223 # endif
1224     return SQLITE_IOERR;
1225   }else{
1226     return SQLITE_OK;
1227   }
1228 #endif
1229 }
1230 
1231 #if OS_WIN
1232 /*
1233 ** Return true (non-zero) if we are running under WinNT, Win2K or WinXP.
1234 ** Return false (zero) for Win95, Win98, or WinME.
1235 **
1236 ** Here is an interesting observation:  Win95, Win98, and WinME lack
1237 ** the LockFileEx() API.  But we can still statically link against that
1238 ** API as long as we don't call it win running Win95/98/ME.  A call to
1239 ** this routine is used to determine if the host is Win95/98/ME or
1240 ** WinNT/2K/XP so that we will know whether or not we can safely call
1241 ** the LockFileEx() API.
1242 */
1243 int isNT(void){
1244   static int osType = 0;   /* 0=unknown 1=win95 2=winNT */
1245   if( osType==0 ){
1246     OSVERSIONINFO sInfo;
1247     sInfo.dwOSVersionInfoSize = sizeof(sInfo);
1248     GetVersionEx(&sInfo);
1249     osType = sInfo.dwPlatformId==VER_PLATFORM_WIN32_NT ? 2 : 1;
1250   }
1251   return osType==2;
1252 }
1253 #endif
1254 
1255 /*
1256 ** Windows file locking notes:  [similar issues apply to MacOS]
1257 **
1258 ** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because
1259 ** those functions are not available.  So we use only LockFile() and
1260 ** UnlockFile().
1261 **
1262 ** LockFile() prevents not just writing but also reading by other processes.
1263 ** (This is a design error on the part of Windows, but there is nothing
1264 ** we can do about that.)  So the region used for locking is at the
1265 ** end of the file where it is unlikely to ever interfere with an
1266 ** actual read attempt.
1267 **
1268 ** A database read lock is obtained by locking a single randomly-chosen
1269 ** byte out of a specific range of bytes. The lock byte is obtained at
1270 ** random so two separate readers can probably access the file at the
1271 ** same time, unless they are unlucky and choose the same lock byte.
1272 ** A database write lock is obtained by locking all bytes in the range.
1273 ** There can only be one writer.
1274 **
1275 ** A lock is obtained on the first byte of the lock range before acquiring
1276 ** either a read lock or a write lock.  This prevents two processes from
1277 ** attempting to get a lock at a same time.  The semantics of
1278 ** sqliteOsReadLock() require that if there is already a write lock, that
1279 ** lock is converted into a read lock atomically.  The lock on the first
1280 ** byte allows us to drop the old write lock and get the read lock without
1281 ** another process jumping into the middle and messing us up.  The same
1282 ** argument applies to sqliteOsWriteLock().
1283 **
1284 ** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available,
1285 ** which means we can use reader/writer locks.  When reader writer locks
1286 ** are used, the lock is placed on the same range of bytes that is used
1287 ** for probabilistic locking in Win95/98/ME.  Hence, the locking scheme
1288 ** will support two or more Win95 readers or two or more WinNT readers.
1289 ** But a single Win95 reader will lock out all WinNT readers and a single
1290 ** WinNT reader will lock out all other Win95 readers.
1291 **
1292 ** Note: On MacOS we use the resource fork for locking.
1293 **
1294 ** The following #defines specify the range of bytes used for locking.
1295 ** N_LOCKBYTE is the number of bytes available for doing the locking.
1296 ** The first byte used to hold the lock while the lock is changing does
1297 ** not count toward this number.  FIRST_LOCKBYTE is the address of
1298 ** the first byte in the range of bytes used for locking.
1299 */
1300 #define N_LOCKBYTE       10239
1301 #if OS_MAC
1302 # define FIRST_LOCKBYTE   (0x000fffff - N_LOCKBYTE)
1303 #else
1304 # define FIRST_LOCKBYTE   (0xffffffff - N_LOCKBYTE)
1305 #endif
1306 
1307 /*
1308 ** Change the status of the lock on the file "id" to be a readlock.
1309 ** If the file was write locked, then this reduces the lock to a read.
1310 ** If the file was read locked, then this acquires a new read lock.
1311 **
1312 ** Return SQLITE_OK on success and SQLITE_BUSY on failure.  If this
1313 ** library was compiled with large file support (LFS) but LFS is not
1314 ** available on the host, then an SQLITE_NOLFS is returned.
1315 */
1316 int sqliteOsReadLock(OsFile *id){
1317 #if OS_UNIX
1318   int rc;
1319   sqliteOsEnterMutex();
1320   if( id->pLock->cnt>0 ){
1321     if( !id->locked ){
1322       id->pLock->cnt++;
1323       id->locked = 1;
1324       id->pOpen->nLock++;
1325     }
1326     rc = SQLITE_OK;
1327   }else if( id->locked || id->pLock->cnt==0 ){
1328     struct flock lock;
1329     int s;
1330     lock.l_type = F_RDLCK;
1331     lock.l_whence = SEEK_SET;
1332     lock.l_start = lock.l_len = 0L;
1333     s = fcntl(id->fd, F_SETLK, &lock);
1334     if( s!=0 ){
1335       rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1336     }else{
1337       rc = SQLITE_OK;
1338       if( !id->locked ){
1339         id->pOpen->nLock++;
1340         id->locked = 1;
1341       }
1342       id->pLock->cnt = 1;
1343     }
1344   }else{
1345     rc = SQLITE_BUSY;
1346   }
1347   sqliteOsLeaveMutex();
1348   return rc;
1349 #endif
1350 #if OS_WIN
1351   int rc;
1352   if( id->locked>0 ){
1353     rc = SQLITE_OK;
1354   }else{
1355     int lk;
1356     int res;
1357     int cnt = 100;
1358     sqliteRandomness(sizeof(lk), &lk);
1359     lk = (lk & 0x7fffffff)%N_LOCKBYTE + 1;
1360     while( cnt-->0 && (res = LockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0))==0 ){
1361       Sleep(1);
1362     }
1363     if( res ){
1364       UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1365       if( isNT() ){
1366         OVERLAPPED ovlp;
1367         ovlp.Offset = FIRST_LOCKBYTE+1;
1368         ovlp.OffsetHigh = 0;
1369         ovlp.hEvent = 0;
1370         res = LockFileEx(id->h, LOCKFILE_FAIL_IMMEDIATELY,
1371                           0, N_LOCKBYTE, 0, &ovlp);
1372       }else{
1373         res = LockFile(id->h, FIRST_LOCKBYTE+lk, 0, 1, 0);
1374       }
1375       UnlockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0);
1376     }
1377     if( res ){
1378       id->locked = lk;
1379       rc = SQLITE_OK;
1380     }else{
1381       rc = SQLITE_BUSY;
1382     }
1383   }
1384   return rc;
1385 #endif
1386 #if OS_MAC
1387   int rc;
1388   if( id->locked>0 || id->refNumRF == -1 ){
1389     rc = SQLITE_OK;
1390   }else{
1391     int lk;
1392     OSErr res;
1393     int cnt = 5;
1394     ParamBlockRec params;
1395     sqliteRandomness(sizeof(lk), &lk);
1396     lk = (lk & 0x7fffffff)%N_LOCKBYTE + 1;
1397     memset(&params, 0, sizeof(params));
1398     params.ioParam.ioRefNum = id->refNumRF;
1399     params.ioParam.ioPosMode = fsFromStart;
1400     params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1401     params.ioParam.ioReqCount = 1;
1402     while( cnt-->0 && (res = PBLockRangeSync(&params))!=noErr ){
1403       UInt32 finalTicks;
1404       Delay(1, &finalTicks); /* 1/60 sec */
1405     }
1406     if( res == noErr ){
1407       params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1408       params.ioParam.ioReqCount = N_LOCKBYTE;
1409       PBUnlockRangeSync(&params);
1410       params.ioParam.ioPosOffset = FIRST_LOCKBYTE+lk;
1411       params.ioParam.ioReqCount = 1;
1412       res = PBLockRangeSync(&params);
1413       params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1414       params.ioParam.ioReqCount = 1;
1415       PBUnlockRangeSync(&params);
1416     }
1417     if( res == noErr ){
1418       id->locked = lk;
1419       rc = SQLITE_OK;
1420     }else{
1421       rc = SQLITE_BUSY;
1422     }
1423   }
1424   return rc;
1425 #endif
1426 }
1427 
1428 /*
1429 ** Change the lock status to be an exclusive or write lock.  Return
1430 ** SQLITE_OK on success and SQLITE_BUSY on a failure.  If this
1431 ** library was compiled with large file support (LFS) but LFS is not
1432 ** available on the host, then an SQLITE_NOLFS is returned.
1433 */
1434 int sqliteOsWriteLock(OsFile *id){
1435 #if OS_UNIX
1436   int rc;
1437   sqliteOsEnterMutex();
1438   if( id->pLock->cnt==0 || (id->pLock->cnt==1 && id->locked==1) ){
1439     struct flock lock;
1440     int s;
1441     lock.l_type = F_WRLCK;
1442     lock.l_whence = SEEK_SET;
1443     lock.l_start = lock.l_len = 0L;
1444     s = fcntl(id->fd, F_SETLK, &lock);
1445     if( s!=0 ){
1446       rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1447     }else{
1448       rc = SQLITE_OK;
1449       if( !id->locked ){
1450         id->pOpen->nLock++;
1451         id->locked = 1;
1452       }
1453       id->pLock->cnt = -1;
1454     }
1455   }else{
1456     rc = SQLITE_BUSY;
1457   }
1458   sqliteOsLeaveMutex();
1459   return rc;
1460 #endif
1461 #if OS_WIN
1462   int rc;
1463   if( id->locked<0 ){
1464     rc = SQLITE_OK;
1465   }else{
1466     int res;
1467     int cnt = 100;
1468     while( cnt-->0 && (res = LockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0))==0 ){
1469       Sleep(1);
1470     }
1471     if( res ){
1472       if( id->locked>0 ){
1473         if( isNT() ){
1474           UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1475         }else{
1476           res = UnlockFile(id->h, FIRST_LOCKBYTE + id->locked, 0, 1, 0);
1477         }
1478       }
1479       if( res ){
1480         res = LockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1481       }else{
1482         res = 0;
1483       }
1484       UnlockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0);
1485     }
1486     if( res ){
1487       id->locked = -1;
1488       rc = SQLITE_OK;
1489     }else{
1490       rc = SQLITE_BUSY;
1491     }
1492   }
1493   return rc;
1494 #endif
1495 #if OS_MAC
1496   int rc;
1497   if( id->locked<0 || id->refNumRF == -1 ){
1498     rc = SQLITE_OK;
1499   }else{
1500     OSErr res;
1501     int cnt = 5;
1502     ParamBlockRec params;
1503     memset(&params, 0, sizeof(params));
1504     params.ioParam.ioRefNum = id->refNumRF;
1505     params.ioParam.ioPosMode = fsFromStart;
1506     params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1507     params.ioParam.ioReqCount = 1;
1508     while( cnt-->0 && (res = PBLockRangeSync(&params))!=noErr ){
1509       UInt32 finalTicks;
1510       Delay(1, &finalTicks); /* 1/60 sec */
1511     }
1512     if( res == noErr ){
1513       params.ioParam.ioPosOffset = FIRST_LOCKBYTE + id->locked;
1514       params.ioParam.ioReqCount = 1;
1515       if( id->locked==0
1516             || PBUnlockRangeSync(&params)==noErr ){
1517         params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1518         params.ioParam.ioReqCount = N_LOCKBYTE;
1519         res = PBLockRangeSync(&params);
1520       }else{
1521         res = afpRangeNotLocked;
1522       }
1523       params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1524       params.ioParam.ioReqCount = 1;
1525       PBUnlockRangeSync(&params);
1526     }
1527     if( res == noErr ){
1528       id->locked = -1;
1529       rc = SQLITE_OK;
1530     }else{
1531       rc = SQLITE_BUSY;
1532     }
1533   }
1534   return rc;
1535 #endif
1536 }
1537 
1538 /*
1539 ** Unlock the given file descriptor.  If the file descriptor was
1540 ** not previously locked, then this routine is a no-op.  If this
1541 ** library was compiled with large file support (LFS) but LFS is not
1542 ** available on the host, then an SQLITE_NOLFS is returned.
1543 */
1544 int sqliteOsUnlock(OsFile *id){
1545 #if OS_UNIX
1546   int rc;
1547   if( !id->locked ) return SQLITE_OK;
1548   sqliteOsEnterMutex();
1549   assert( id->pLock->cnt!=0 );
1550   if( id->pLock->cnt>1 ){
1551     id->pLock->cnt--;
1552     rc = SQLITE_OK;
1553   }else{
1554     struct flock lock;
1555     int s;
1556     lock.l_type = F_UNLCK;
1557     lock.l_whence = SEEK_SET;
1558     lock.l_start = lock.l_len = 0L;
1559     s = fcntl(id->fd, F_SETLK, &lock);
1560     if( s!=0 ){
1561       rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1562     }else{
1563       rc = SQLITE_OK;
1564       id->pLock->cnt = 0;
1565     }
1566   }
1567   if( rc==SQLITE_OK ){
1568     /* Decrement the count of locks against this same file.  When the
1569     ** count reaches zero, close any other file descriptors whose close
1570     ** was deferred because of outstanding locks.
1571     */
1572     struct openCnt *pOpen = id->pOpen;
1573     pOpen->nLock--;
1574     assert( pOpen->nLock>=0 );
1575     if( pOpen->nLock==0 && pOpen->nPending>0 ){
1576       int i;
1577       for(i=0; i<pOpen->nPending; i++){
1578         close(pOpen->aPending[i]);
1579       }
1580       sqliteFree(pOpen->aPending);
1581       pOpen->nPending = 0;
1582       pOpen->aPending = 0;
1583     }
1584   }
1585   sqliteOsLeaveMutex();
1586   id->locked = 0;
1587   return rc;
1588 #endif
1589 #if OS_WIN
1590   int rc;
1591   if( id->locked==0 ){
1592     rc = SQLITE_OK;
1593   }else if( isNT() || id->locked<0 ){
1594     UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1595     rc = SQLITE_OK;
1596     id->locked = 0;
1597   }else{
1598     UnlockFile(id->h, FIRST_LOCKBYTE+id->locked, 0, 1, 0);
1599     rc = SQLITE_OK;
1600     id->locked = 0;
1601   }
1602   return rc;
1603 #endif
1604 #if OS_MAC
1605   int rc;
1606   ParamBlockRec params;
1607   memset(&params, 0, sizeof(params));
1608   params.ioParam.ioRefNum = id->refNumRF;
1609   params.ioParam.ioPosMode = fsFromStart;
1610   if( id->locked==0 || id->refNumRF == -1 ){
1611     rc = SQLITE_OK;
1612   }else if( id->locked<0 ){
1613     params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1614     params.ioParam.ioReqCount = N_LOCKBYTE;
1615     PBUnlockRangeSync(&params);
1616     rc = SQLITE_OK;
1617     id->locked = 0;
1618   }else{
1619     params.ioParam.ioPosOffset = FIRST_LOCKBYTE+id->locked;
1620     params.ioParam.ioReqCount = 1;
1621     PBUnlockRangeSync(&params);
1622     rc = SQLITE_OK;
1623     id->locked = 0;
1624   }
1625   return rc;
1626 #endif
1627 }
1628 
1629 /*
1630 ** Get information to seed the random number generator.  The seed
1631 ** is written into the buffer zBuf[256].  The calling function must
1632 ** supply a sufficiently large buffer.
1633 */
1634 int sqliteOsRandomSeed(char *zBuf){
1635   /* We have to initialize zBuf to prevent valgrind from reporting
1636   ** errors.  The reports issued by valgrind are incorrect - we would
1637   ** prefer that the randomness be increased by making use of the
1638   ** uninitialized space in zBuf - but valgrind errors tend to worry
1639   ** some users.  Rather than argue, it seems easier just to initialize
1640   ** the whole array and silence valgrind, even if that means less randomness
1641   ** in the random seed.
1642   **
1643   ** When testing, initializing zBuf[] to zero is all we do.  That means
1644   ** that we always use the same random number sequence.* This makes the
1645   ** tests repeatable.
1646   */
1647   memset(zBuf, 0, 256);
1648 #if OS_UNIX && !defined(SQLITE_TEST)
1649   {
1650     int pid;
1651     time((time_t*)zBuf);
1652     pid = getpid();
1653     memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
1654   }
1655 #endif
1656 #if OS_WIN && !defined(SQLITE_TEST)
1657   GetSystemTime((LPSYSTEMTIME)zBuf);
1658 #endif
1659 #if OS_MAC
1660   {
1661     int pid;
1662     Microseconds((UnsignedWide*)zBuf);
1663     pid = getpid();
1664     memcpy(&zBuf[sizeof(UnsignedWide)], &pid, sizeof(pid));
1665   }
1666 #endif
1667   return SQLITE_OK;
1668 }
1669 
1670 /*
1671 ** Sleep for a little while.  Return the amount of time slept.
1672 */
1673 int sqliteOsSleep(int ms){
1674 #if OS_UNIX
1675 #if defined(HAVE_USLEEP) && HAVE_USLEEP
1676   usleep(ms*1000);
1677   return ms;
1678 #else
1679   sleep((ms+999)/1000);
1680   return 1000*((ms+999)/1000);
1681 #endif
1682 #endif
1683 #if OS_WIN
1684   Sleep(ms);
1685   return ms;
1686 #endif
1687 #if OS_MAC
1688   UInt32 finalTicks;
1689   UInt32 ticks = (((UInt32)ms+16)*3)/50;  /* 1/60 sec per tick */
1690   Delay(ticks, &finalTicks);
1691   return (int)((ticks*50)/3);
1692 #endif
1693 }
1694 
1695 /*
1696 ** Static variables used for thread synchronization
1697 */
1698 static int inMutex = 0;
1699 #ifdef SQLITE_UNIX_THREADS
1700   static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
1701 #endif
1702 #ifdef SQLITE_W32_THREADS
1703   static CRITICAL_SECTION cs;
1704 #endif
1705 #ifdef SQLITE_MACOS_MULTITASKING
1706   static MPCriticalRegionID criticalRegion;
1707 #endif
1708 
1709 /*
1710 ** The following pair of routine implement mutual exclusion for
1711 ** multi-threaded processes.  Only a single thread is allowed to
1712 ** executed code that is surrounded by EnterMutex() and LeaveMutex().
1713 **
1714 ** SQLite uses only a single Mutex.  There is not much critical
1715 ** code and what little there is executes quickly and without blocking.
1716 */
1717 void sqliteOsEnterMutex(){
1718 #ifdef SQLITE_UNIX_THREADS
1719   pthread_mutex_lock(&mutex);
1720 #endif
1721 #ifdef SQLITE_W32_THREADS
1722   static int isInit = 0;
1723   while( !isInit ){
1724     static long lock = 0;
1725     if( InterlockedIncrement(&lock)==1 ){
1726       InitializeCriticalSection(&cs);
1727       isInit = 1;
1728     }else{
1729       Sleep(1);
1730     }
1731   }
1732   EnterCriticalSection(&cs);
1733 #endif
1734 #ifdef SQLITE_MACOS_MULTITASKING
1735   static volatile int notInit = 1;
1736   if( notInit ){
1737     if( notInit == 2 ) /* as close as you can get to thread safe init */
1738       MPYield();
1739     else{
1740       notInit = 2;
1741       MPCreateCriticalRegion(&criticalRegion);
1742       notInit = 0;
1743     }
1744   }
1745   MPEnterCriticalRegion(criticalRegion, kDurationForever);
1746 #endif
1747   assert( !inMutex );
1748   inMutex = 1;
1749 }
1750 void sqliteOsLeaveMutex(){
1751   assert( inMutex );
1752   inMutex = 0;
1753 #ifdef SQLITE_UNIX_THREADS
1754   pthread_mutex_unlock(&mutex);
1755 #endif
1756 #ifdef SQLITE_W32_THREADS
1757   LeaveCriticalSection(&cs);
1758 #endif
1759 #ifdef SQLITE_MACOS_MULTITASKING
1760   MPExitCriticalRegion(criticalRegion);
1761 #endif
1762 }
1763 
1764 /*
1765 ** Turn a relative pathname into a full pathname.  Return a pointer
1766 ** to the full pathname stored in space obtained from sqliteMalloc().
1767 ** The calling function is responsible for freeing this space once it
1768 ** is no longer needed.
1769 */
1770 char *sqliteOsFullPathname(const char *zRelative){
1771 #if OS_UNIX
1772   char *zFull = 0;
1773   if( zRelative[0]=='/' ){
1774     sqliteSetString(&zFull, zRelative, (char*)0);
1775   }else{
1776     char zBuf[5000];
1777     sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), "/", zRelative,
1778                     (char*)0);
1779   }
1780   return zFull;
1781 #endif
1782 #if OS_WIN
1783   char *zNotUsed;
1784   char *zFull;
1785   int nByte;
1786   nByte = GetFullPathName(zRelative, 0, 0, &zNotUsed) + 1;
1787   zFull = sqliteMalloc( nByte );
1788   if( zFull==0 ) return 0;
1789   GetFullPathName(zRelative, nByte, zFull, &zNotUsed);
1790   return zFull;
1791 #endif
1792 #if OS_MAC
1793   char *zFull = 0;
1794   if( zRelative[0]==':' ){
1795     char zBuf[_MAX_PATH+1];
1796     sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), &(zRelative[1]),
1797                     (char*)0);
1798   }else{
1799     if( strchr(zRelative, ':') ){
1800       sqliteSetString(&zFull, zRelative, (char*)0);
1801     }else{
1802     char zBuf[_MAX_PATH+1];
1803       sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), zRelative, (char*)0);
1804     }
1805   }
1806   return zFull;
1807 #endif
1808 }
1809 
1810 /*
1811 ** The following variable, if set to a non-zero value, becomes the result
1812 ** returned from sqliteOsCurrentTime().  This is used for testing.
1813 */
1814 #ifdef SQLITE_TEST
1815 int sqlite_current_time = 0;
1816 #endif
1817 
1818 /*
1819 ** Find the current time (in Universal Coordinated Time).  Write the
1820 ** current time and date as a Julian Day number into *prNow and
1821 ** return 0.  Return 1 if the time and date cannot be found.
1822 */
1823 int sqliteOsCurrentTime(double *prNow){
1824 #if OS_UNIX
1825   time_t t;
1826   time(&t);
1827   *prNow = t/86400.0 + 2440587.5;
1828 #endif
1829 #if OS_WIN
1830   FILETIME ft;
1831   /* FILETIME structure is a 64-bit value representing the number of
1832      100-nanosecond intervals since January 1, 1601 (= JD 2305813.5).
1833   */
1834   double now;
1835   GetSystemTimeAsFileTime( &ft );
1836   now = ((double)ft.dwHighDateTime) * 4294967296.0;
1837   *prNow = (now + ft.dwLowDateTime)/864000000000.0 + 2305813.5;
1838 #endif
1839 #ifdef SQLITE_TEST
1840   if( sqlite_current_time ){
1841     *prNow = sqlite_current_time/86400.0 + 2440587.5;
1842   }
1843 #endif
1844   return 0;
1845 }
1846