xref: /titanic_44/usr/src/lib/libsqlite/src/os.c (revision c5c4113dfcabb1eed3d4bdf7609de5170027a794)
1  
2  #pragma ident	"%Z%%M%	%I%	%E% SMI"
3  
4  /*
5  ** 2001 September 16
6  **
7  ** The author disclaims copyright to this source code.  In place of
8  ** a legal notice, here is a blessing:
9  **
10  **    May you do good and not evil.
11  **    May you find forgiveness for yourself and forgive others.
12  **    May you share freely, never taking more than you give.
13  **
14  ******************************************************************************
15  **
16  ** This file contains code that is specific to particular operating
17  ** systems.  The purpose of this file is to provide a uniform abstraction
18  ** on which the rest of SQLite can operate.
19  */
20  #include "os.h"          /* Must be first to enable large file support */
21  #include "sqliteInt.h"
22  
23  #if OS_UNIX
24  # include <time.h>
25  # include <errno.h>
26  # include <unistd.h>
27  # ifndef O_LARGEFILE
28  #  define O_LARGEFILE 0
29  # endif
30  # ifdef SQLITE_DISABLE_LFS
31  #  undef O_LARGEFILE
32  #  define O_LARGEFILE 0
33  # endif
34  # ifndef O_NOFOLLOW
35  #  define O_NOFOLLOW 0
36  # endif
37  # ifndef O_BINARY
38  #  define O_BINARY 0
39  # endif
40  #endif
41  
42  
43  #if OS_WIN
44  # include <winbase.h>
45  #endif
46  
47  #if OS_MAC
48  # include <extras.h>
49  # include <path2fss.h>
50  # include <TextUtils.h>
51  # include <FinderRegistry.h>
52  # include <Folders.h>
53  # include <Timer.h>
54  # include <OSUtils.h>
55  #endif
56  
57  /*
58  ** The DJGPP compiler environment looks mostly like Unix, but it
59  ** lacks the fcntl() system call.  So redefine fcntl() to be something
60  ** that always succeeds.  This means that locking does not occur under
61  ** DJGPP.  But its DOS - what did you expect?
62  */
63  #ifdef __DJGPP__
64  # define fcntl(A,B,C) 0
65  #endif
66  
67  /*
68  ** Macros used to determine whether or not to use threads.  The
69  ** SQLITE_UNIX_THREADS macro is defined if we are synchronizing for
70  ** Posix threads and SQLITE_W32_THREADS is defined if we are
71  ** synchronizing using Win32 threads.
72  */
73  #if OS_UNIX && defined(THREADSAFE) && THREADSAFE
74  # include <pthread.h>
75  # define SQLITE_UNIX_THREADS 1
76  #endif
77  #if OS_WIN && defined(THREADSAFE) && THREADSAFE
78  # define SQLITE_W32_THREADS 1
79  #endif
80  #if OS_MAC && defined(THREADSAFE) && THREADSAFE
81  # include <Multiprocessing.h>
82  # define SQLITE_MACOS_MULTITASKING 1
83  #endif
84  
85  /*
86  ** Macros for performance tracing.  Normally turned off
87  */
88  #if 0
89  static int last_page = 0;
90  __inline__ unsigned long long int hwtime(void){
91    unsigned long long int x;
92    __asm__("rdtsc\n\t"
93            "mov %%edx, %%ecx\n\t"
94            :"=A" (x));
95    return x;
96  }
97  static unsigned long long int g_start;
98  static unsigned int elapse;
99  #define TIMER_START       g_start=hwtime()
100  #define TIMER_END         elapse=hwtime()-g_start
101  #define SEEK(X)           last_page=(X)
102  #define TRACE1(X)         fprintf(stderr,X)
103  #define TRACE2(X,Y)       fprintf(stderr,X,Y)
104  #define TRACE3(X,Y,Z)     fprintf(stderr,X,Y,Z)
105  #define TRACE4(X,Y,Z,A)   fprintf(stderr,X,Y,Z,A)
106  #define TRACE5(X,Y,Z,A,B) fprintf(stderr,X,Y,Z,A,B)
107  #else
108  #define TIMER_START
109  #define TIMER_END
110  #define SEEK(X)
111  #define TRACE1(X)
112  #define TRACE2(X,Y)
113  #define TRACE3(X,Y,Z)
114  #define TRACE4(X,Y,Z,A)
115  #define TRACE5(X,Y,Z,A,B)
116  #endif
117  
118  
119  #if OS_UNIX
120  /*
121  ** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
122  ** section 6.5.2.2 lines 483 through 490 specify that when a process
123  ** sets or clears a lock, that operation overrides any prior locks set
124  ** by the same process.  It does not explicitly say so, but this implies
125  ** that it overrides locks set by the same process using a different
126  ** file descriptor.  Consider this test case:
127  **
128  **       int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
129  **       int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
130  **
131  ** Suppose ./file1 and ./file2 are really the same file (because
132  ** one is a hard or symbolic link to the other) then if you set
133  ** an exclusive lock on fd1, then try to get an exclusive lock
134  ** on fd2, it works.  I would have expected the second lock to
135  ** fail since there was already a lock on the file due to fd1.
136  ** But not so.  Since both locks came from the same process, the
137  ** second overrides the first, even though they were on different
138  ** file descriptors opened on different file names.
139  **
140  ** Bummer.  If you ask me, this is broken.  Badly broken.  It means
141  ** that we cannot use POSIX locks to synchronize file access among
142  ** competing threads of the same process.  POSIX locks will work fine
143  ** to synchronize access for threads in separate processes, but not
144  ** threads within the same process.
145  **
146  ** To work around the problem, SQLite has to manage file locks internally
147  ** on its own.  Whenever a new database is opened, we have to find the
148  ** specific inode of the database file (the inode is determined by the
149  ** st_dev and st_ino fields of the stat structure that fstat() fills in)
150  ** and check for locks already existing on that inode.  When locks are
151  ** created or removed, we have to look at our own internal record of the
152  ** locks to see if another thread has previously set a lock on that same
153  ** inode.
154  **
155  ** The OsFile structure for POSIX is no longer just an integer file
156  ** descriptor.  It is now a structure that holds the integer file
157  ** descriptor and a pointer to a structure that describes the internal
158  ** locks on the corresponding inode.  There is one locking structure
159  ** per inode, so if the same inode is opened twice, both OsFile structures
160  ** point to the same locking structure.  The locking structure keeps
161  ** a reference count (so we will know when to delete it) and a "cnt"
162  ** field that tells us its internal lock status.  cnt==0 means the
163  ** file is unlocked.  cnt==-1 means the file has an exclusive lock.
164  ** cnt>0 means there are cnt shared locks on the file.
165  **
166  ** Any attempt to lock or unlock a file first checks the locking
167  ** structure.  The fcntl() system call is only invoked to set a
168  ** POSIX lock if the internal lock structure transitions between
169  ** a locked and an unlocked state.
170  **
171  ** 2004-Jan-11:
172  ** More recent discoveries about POSIX advisory locks.  (The more
173  ** I discover, the more I realize the a POSIX advisory locks are
174  ** an abomination.)
175  **
176  ** If you close a file descriptor that points to a file that has locks,
177  ** all locks on that file that are owned by the current process are
178  ** released.  To work around this problem, each OsFile structure contains
179  ** a pointer to an openCnt structure.  There is one openCnt structure
180  ** per open inode, which means that multiple OsFiles can point to a single
181  ** openCnt.  When an attempt is made to close an OsFile, if there are
182  ** other OsFiles open on the same inode that are holding locks, the call
183  ** to close() the file descriptor is deferred until all of the locks clear.
184  ** The openCnt structure keeps a list of file descriptors that need to
185  ** be closed and that list is walked (and cleared) when the last lock
186  ** clears.
187  **
188  ** First, under Linux threads, because each thread has a separate
189  ** process ID, lock operations in one thread do not override locks
190  ** to the same file in other threads.  Linux threads behave like
191  ** separate processes in this respect.  But, if you close a file
192  ** descriptor in linux threads, all locks are cleared, even locks
193  ** on other threads and even though the other threads have different
194  ** process IDs.  Linux threads is inconsistent in this respect.
195  ** (I'm beginning to think that linux threads is an abomination too.)
196  ** The consequence of this all is that the hash table for the lockInfo
197  ** structure has to include the process id as part of its key because
198  ** locks in different threads are treated as distinct.  But the
199  ** openCnt structure should not include the process id in its
200  ** key because close() clears lock on all threads, not just the current
201  ** thread.  Were it not for this goofiness in linux threads, we could
202  ** combine the lockInfo and openCnt structures into a single structure.
203  */
204  
205  /*
206  ** An instance of the following structure serves as the key used
207  ** to locate a particular lockInfo structure given its inode.  Note
208  ** that we have to include the process ID as part of the key.  On some
209  ** threading implementations (ex: linux), each thread has a separate
210  ** process ID.
211  */
212  struct lockKey {
213    dev_t dev;   /* Device number */
214    ino_t ino;   /* Inode number */
215    pid_t pid;   /* Process ID */
216  };
217  
218  /*
219  ** An instance of the following structure is allocated for each open
220  ** inode on each thread with a different process ID.  (Threads have
221  ** different process IDs on linux, but not on most other unixes.)
222  **
223  ** A single inode can have multiple file descriptors, so each OsFile
224  ** structure contains a pointer to an instance of this object and this
225  ** object keeps a count of the number of OsFiles pointing to it.
226  */
227  struct lockInfo {
228    struct lockKey key;  /* The lookup key */
229    int cnt;             /* 0: unlocked.  -1: write lock.  1...: read lock. */
230    int nRef;            /* Number of pointers to this structure */
231  };
232  
233  /*
234  ** An instance of the following structure serves as the key used
235  ** to locate a particular openCnt structure given its inode.  This
236  ** is the same as the lockKey except that the process ID is omitted.
237  */
238  struct openKey {
239    dev_t dev;   /* Device number */
240    ino_t ino;   /* Inode number */
241  };
242  
243  /*
244  ** An instance of the following structure is allocated for each open
245  ** inode.  This structure keeps track of the number of locks on that
246  ** inode.  If a close is attempted against an inode that is holding
247  ** locks, the close is deferred until all locks clear by adding the
248  ** file descriptor to be closed to the pending list.
249  */
250  struct openCnt {
251    struct openKey key;   /* The lookup key */
252    int nRef;             /* Number of pointers to this structure */
253    int nLock;            /* Number of outstanding locks */
254    int nPending;         /* Number of pending close() operations */
255    int *aPending;        /* Malloced space holding fd's awaiting a close() */
256  };
257  
258  /*
259  ** These hash table maps inodes and process IDs into lockInfo and openCnt
260  ** structures.  Access to these hash tables must be protected by a mutex.
261  */
262  static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
263  static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
264  
265  /*
266  ** Release a lockInfo structure previously allocated by findLockInfo().
267  */
releaseLockInfo(struct lockInfo * pLock)268  static void releaseLockInfo(struct lockInfo *pLock){
269    pLock->nRef--;
270    if( pLock->nRef==0 ){
271      sqliteHashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
272      sqliteFree(pLock);
273    }
274  }
275  
276  /*
277  ** Release a openCnt structure previously allocated by findLockInfo().
278  */
releaseOpenCnt(struct openCnt * pOpen)279  static void releaseOpenCnt(struct openCnt *pOpen){
280    pOpen->nRef--;
281    if( pOpen->nRef==0 ){
282      sqliteHashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
283      sqliteFree(pOpen->aPending);
284      sqliteFree(pOpen);
285    }
286  }
287  
288  /*
289  ** Given a file descriptor, locate lockInfo and openCnt structures that
290  ** describes that file descriptor.  Create a new ones if necessary.  The
291  ** return values might be unset if an error occurs.
292  **
293  ** Return the number of errors.
294  */
findLockInfo(int fd,struct lockInfo ** ppLock,struct openCnt ** ppOpen)295  int findLockInfo(
296    int fd,                      /* The file descriptor used in the key */
297    struct lockInfo **ppLock,    /* Return the lockInfo structure here */
298    struct openCnt **ppOpen   /* Return the openCnt structure here */
299  ){
300    int rc;
301    struct lockKey key1;
302    struct openKey key2;
303    struct stat statbuf;
304    struct lockInfo *pLock;
305    struct openCnt *pOpen;
306    rc = fstat(fd, &statbuf);
307    if( rc!=0 ) return 1;
308    memset(&key1, 0, sizeof(key1));
309    key1.dev = statbuf.st_dev;
310    key1.ino = statbuf.st_ino;
311    key1.pid = getpid();
312    memset(&key2, 0, sizeof(key2));
313    key2.dev = statbuf.st_dev;
314    key2.ino = statbuf.st_ino;
315    pLock = (struct lockInfo*)sqliteHashFind(&lockHash, &key1, sizeof(key1));
316    if( pLock==0 ){
317      struct lockInfo *pOld;
318      pLock = sqliteMallocRaw( sizeof(*pLock) );
319      if( pLock==0 ) return 1;
320      pLock->key = key1;
321      pLock->nRef = 1;
322      pLock->cnt = 0;
323      pOld = sqliteHashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
324      if( pOld!=0 ){
325        assert( pOld==pLock );
326        sqliteFree(pLock);
327        return 1;
328      }
329    }else{
330      pLock->nRef++;
331    }
332    *ppLock = pLock;
333    pOpen = (struct openCnt*)sqliteHashFind(&openHash, &key2, sizeof(key2));
334    if( pOpen==0 ){
335      struct openCnt *pOld;
336      pOpen = sqliteMallocRaw( sizeof(*pOpen) );
337      if( pOpen==0 ){
338        releaseLockInfo(pLock);
339        return 1;
340      }
341      pOpen->key = key2;
342      pOpen->nRef = 1;
343      pOpen->nLock = 0;
344      pOpen->nPending = 0;
345      pOpen->aPending = 0;
346      pOld = sqliteHashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
347      if( pOld!=0 ){
348        assert( pOld==pOpen );
349        sqliteFree(pOpen);
350        releaseLockInfo(pLock);
351        return 1;
352      }
353    }else{
354      pOpen->nRef++;
355    }
356    *ppOpen = pOpen;
357    return 0;
358  }
359  
360  #endif  /** POSIX advisory lock work-around **/
361  
362  /*
363  ** If we compile with the SQLITE_TEST macro set, then the following block
364  ** of code will give us the ability to simulate a disk I/O error.  This
365  ** is used for testing the I/O recovery logic.
366  */
367  #ifdef SQLITE_TEST
368  int sqlite_io_error_pending = 0;
369  #define SimulateIOError(A)  \
370     if( sqlite_io_error_pending ) \
371       if( sqlite_io_error_pending-- == 1 ){ local_ioerr(); return A; }
local_ioerr()372  static void local_ioerr(){
373    sqlite_io_error_pending = 0;  /* Really just a place to set a breakpoint */
374  }
375  #else
376  #define SimulateIOError(A)
377  #endif
378  
379  /*
380  ** When testing, keep a count of the number of open files.
381  */
382  #ifdef SQLITE_TEST
383  int sqlite_open_file_count = 0;
384  #define OpenCounter(X)  sqlite_open_file_count+=(X)
385  #else
386  #define OpenCounter(X)
387  #endif
388  
389  
390  /*
391  ** Delete the named file
392  */
sqliteOsDelete(const char * zFilename)393  int sqliteOsDelete(const char *zFilename){
394  #if OS_UNIX
395    unlink(zFilename);
396  #endif
397  #if OS_WIN
398    DeleteFile(zFilename);
399  #endif
400  #if OS_MAC
401    unlink(zFilename);
402  #endif
403    return SQLITE_OK;
404  }
405  
406  /*
407  ** Return TRUE if the named file exists.
408  */
sqliteOsFileExists(const char * zFilename)409  int sqliteOsFileExists(const char *zFilename){
410  #if OS_UNIX
411    return access(zFilename, 0)==0;
412  #endif
413  #if OS_WIN
414    return GetFileAttributes(zFilename) != 0xffffffff;
415  #endif
416  #if OS_MAC
417    return access(zFilename, 0)==0;
418  #endif
419  }
420  
421  
422  #if 0 /* NOT USED */
423  /*
424  ** Change the name of an existing file.
425  */
426  int sqliteOsFileRename(const char *zOldName, const char *zNewName){
427  #if OS_UNIX
428    if( link(zOldName, zNewName) ){
429      return SQLITE_ERROR;
430    }
431    unlink(zOldName);
432    return SQLITE_OK;
433  #endif
434  #if OS_WIN
435    if( !MoveFile(zOldName, zNewName) ){
436      return SQLITE_ERROR;
437    }
438    return SQLITE_OK;
439  #endif
440  #if OS_MAC
441    /**** FIX ME ***/
442    return SQLITE_ERROR;
443  #endif
444  }
445  #endif /* NOT USED */
446  
447  /*
448  ** Attempt to open a file for both reading and writing.  If that
449  ** fails, try opening it read-only.  If the file does not exist,
450  ** try to create it.
451  **
452  ** On success, a handle for the open file is written to *id
453  ** and *pReadonly is set to 0 if the file was opened for reading and
454  ** writing or 1 if the file was opened read-only.  The function returns
455  ** SQLITE_OK.
456  **
457  ** On failure, the function returns SQLITE_CANTOPEN and leaves
458  ** *id and *pReadonly unchanged.
459  */
sqliteOsOpenReadWrite(const char * zFilename,OsFile * id,int * pReadonly)460  int sqliteOsOpenReadWrite(
461    const char *zFilename,
462    OsFile *id,
463    int *pReadonly
464  ){
465  #if OS_UNIX
466    int rc;
467    id->dirfd = -1;
468    id->fd = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY, 0644);
469    if( id->fd<0 ){
470  #ifdef EISDIR
471      if( errno==EISDIR ){
472        return SQLITE_CANTOPEN;
473      }
474  #endif
475      id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
476      if( id->fd<0 ){
477        return SQLITE_CANTOPEN;
478      }
479      *pReadonly = 1;
480    }else{
481      *pReadonly = 0;
482    }
483    sqliteOsEnterMutex();
484    rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
485    sqliteOsLeaveMutex();
486    if( rc ){
487      close(id->fd);
488      return SQLITE_NOMEM;
489    }
490    id->locked = 0;
491    TRACE3("OPEN    %-3d %s\n", id->fd, zFilename);
492    OpenCounter(+1);
493    return SQLITE_OK;
494  #endif
495  #if OS_WIN
496    HANDLE h = CreateFile(zFilename,
497       GENERIC_READ | GENERIC_WRITE,
498       FILE_SHARE_READ | FILE_SHARE_WRITE,
499       NULL,
500       OPEN_ALWAYS,
501       FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
502       NULL
503    );
504    if( h==INVALID_HANDLE_VALUE ){
505      h = CreateFile(zFilename,
506         GENERIC_READ,
507         FILE_SHARE_READ,
508         NULL,
509         OPEN_ALWAYS,
510         FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
511         NULL
512      );
513      if( h==INVALID_HANDLE_VALUE ){
514        return SQLITE_CANTOPEN;
515      }
516      *pReadonly = 1;
517    }else{
518      *pReadonly = 0;
519    }
520    id->h = h;
521    id->locked = 0;
522    OpenCounter(+1);
523    return SQLITE_OK;
524  #endif
525  #if OS_MAC
526    FSSpec fsSpec;
527  # ifdef _LARGE_FILE
528    HFSUniStr255 dfName;
529    FSRef fsRef;
530    if( __path2fss(zFilename, &fsSpec) != noErr ){
531      if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
532        return SQLITE_CANTOPEN;
533    }
534    if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
535      return SQLITE_CANTOPEN;
536    FSGetDataForkName(&dfName);
537    if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
538                   fsRdWrShPerm, &(id->refNum)) != noErr ){
539      if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
540                     fsRdWrPerm, &(id->refNum)) != noErr ){
541        if (FSOpenFork(&fsRef, dfName.length, dfName.unicode,
542                     fsRdPerm, &(id->refNum)) != noErr )
543          return SQLITE_CANTOPEN;
544        else
545          *pReadonly = 1;
546      } else
547        *pReadonly = 0;
548    } else
549      *pReadonly = 0;
550  # else
551    __path2fss(zFilename, &fsSpec);
552    if( !sqliteOsFileExists(zFilename) ){
553      if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
554        return SQLITE_CANTOPEN;
555    }
556    if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNum)) != noErr ){
557      if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrPerm, &(id->refNum)) != noErr ){
558        if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdPerm, &(id->refNum)) != noErr )
559          return SQLITE_CANTOPEN;
560        else
561          *pReadonly = 1;
562      } else
563        *pReadonly = 0;
564    } else
565      *pReadonly = 0;
566  # endif
567    if( HOpenRF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNumRF)) != noErr){
568      id->refNumRF = -1;
569    }
570    id->locked = 0;
571    id->delOnClose = 0;
572    OpenCounter(+1);
573    return SQLITE_OK;
574  #endif
575  }
576  
577  
578  /*
579  ** Attempt to open a new file for exclusive access by this process.
580  ** The file will be opened for both reading and writing.  To avoid
581  ** a potential security problem, we do not allow the file to have
582  ** previously existed.  Nor do we allow the file to be a symbolic
583  ** link.
584  **
585  ** If delFlag is true, then make arrangements to automatically delete
586  ** the file when it is closed.
587  **
588  ** On success, write the file handle into *id and return SQLITE_OK.
589  **
590  ** On failure, return SQLITE_CANTOPEN.
591  */
sqliteOsOpenExclusive(const char * zFilename,OsFile * id,int delFlag)592  int sqliteOsOpenExclusive(const char *zFilename, OsFile *id, int delFlag){
593  #if OS_UNIX
594    int rc;
595    if( access(zFilename, 0)==0 ){
596      return SQLITE_CANTOPEN;
597    }
598    id->dirfd = -1;
599    id->fd = open(zFilename,
600                  O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY, 0600);
601    if( id->fd<0 ){
602      return SQLITE_CANTOPEN;
603    }
604    sqliteOsEnterMutex();
605    rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
606    sqliteOsLeaveMutex();
607    if( rc ){
608      close(id->fd);
609      unlink(zFilename);
610      return SQLITE_NOMEM;
611    }
612    id->locked = 0;
613    if( delFlag ){
614      unlink(zFilename);
615    }
616    TRACE3("OPEN-EX %-3d %s\n", id->fd, zFilename);
617    OpenCounter(+1);
618    return SQLITE_OK;
619  #endif
620  #if OS_WIN
621    HANDLE h;
622    int fileflags;
623    if( delFlag ){
624      fileflags = FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_RANDOM_ACCESS
625                       | FILE_FLAG_DELETE_ON_CLOSE;
626    }else{
627      fileflags = FILE_FLAG_RANDOM_ACCESS;
628    }
629    h = CreateFile(zFilename,
630       GENERIC_READ | GENERIC_WRITE,
631       0,
632       NULL,
633       CREATE_ALWAYS,
634       fileflags,
635       NULL
636    );
637    if( h==INVALID_HANDLE_VALUE ){
638      return SQLITE_CANTOPEN;
639    }
640    id->h = h;
641    id->locked = 0;
642    OpenCounter(+1);
643    return SQLITE_OK;
644  #endif
645  #if OS_MAC
646    FSSpec fsSpec;
647  # ifdef _LARGE_FILE
648    HFSUniStr255 dfName;
649    FSRef fsRef;
650    __path2fss(zFilename, &fsSpec);
651    if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
652      return SQLITE_CANTOPEN;
653    if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
654      return SQLITE_CANTOPEN;
655    FSGetDataForkName(&dfName);
656    if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
657                   fsRdWrPerm, &(id->refNum)) != noErr )
658      return SQLITE_CANTOPEN;
659  # else
660    __path2fss(zFilename, &fsSpec);
661    if( HCreate(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, 'SQLI', cDocumentFile) != noErr )
662      return SQLITE_CANTOPEN;
663    if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrPerm, &(id->refNum)) != noErr )
664      return SQLITE_CANTOPEN;
665  # endif
666    id->refNumRF = -1;
667    id->locked = 0;
668    id->delOnClose = delFlag;
669    if (delFlag)
670      id->pathToDel = sqliteOsFullPathname(zFilename);
671    OpenCounter(+1);
672    return SQLITE_OK;
673  #endif
674  }
675  
676  /*
677  ** Attempt to open a new file for read-only access.
678  **
679  ** On success, write the file handle into *id and return SQLITE_OK.
680  **
681  ** On failure, return SQLITE_CANTOPEN.
682  */
sqliteOsOpenReadOnly(const char * zFilename,OsFile * id)683  int sqliteOsOpenReadOnly(const char *zFilename, OsFile *id){
684  #if OS_UNIX
685    int rc;
686    id->dirfd = -1;
687    id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
688    if( id->fd<0 ){
689      return SQLITE_CANTOPEN;
690    }
691    sqliteOsEnterMutex();
692    rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
693    sqliteOsLeaveMutex();
694    if( rc ){
695      close(id->fd);
696      return SQLITE_NOMEM;
697    }
698    id->locked = 0;
699    TRACE3("OPEN-RO %-3d %s\n", id->fd, zFilename);
700    OpenCounter(+1);
701    return SQLITE_OK;
702  #endif
703  #if OS_WIN
704    HANDLE h = CreateFile(zFilename,
705       GENERIC_READ,
706       0,
707       NULL,
708       OPEN_EXISTING,
709       FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
710       NULL
711    );
712    if( h==INVALID_HANDLE_VALUE ){
713      return SQLITE_CANTOPEN;
714    }
715    id->h = h;
716    id->locked = 0;
717    OpenCounter(+1);
718    return SQLITE_OK;
719  #endif
720  #if OS_MAC
721    FSSpec fsSpec;
722  # ifdef _LARGE_FILE
723    HFSUniStr255 dfName;
724    FSRef fsRef;
725    if( __path2fss(zFilename, &fsSpec) != noErr )
726      return SQLITE_CANTOPEN;
727    if( FSpMakeFSRef(&fsSpec, &fsRef) != noErr )
728      return SQLITE_CANTOPEN;
729    FSGetDataForkName(&dfName);
730    if( FSOpenFork(&fsRef, dfName.length, dfName.unicode,
731                   fsRdPerm, &(id->refNum)) != noErr )
732      return SQLITE_CANTOPEN;
733  # else
734    __path2fss(zFilename, &fsSpec);
735    if( HOpenDF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdPerm, &(id->refNum)) != noErr )
736      return SQLITE_CANTOPEN;
737  # endif
738    if( HOpenRF(fsSpec.vRefNum, fsSpec.parID, fsSpec.name, fsRdWrShPerm, &(id->refNumRF)) != noErr){
739      id->refNumRF = -1;
740    }
741    id->locked = 0;
742    id->delOnClose = 0;
743    OpenCounter(+1);
744    return SQLITE_OK;
745  #endif
746  }
747  
748  /*
749  ** Attempt to open a file descriptor for the directory that contains a
750  ** file.  This file descriptor can be used to fsync() the directory
751  ** in order to make sure the creation of a new file is actually written
752  ** to disk.
753  **
754  ** This routine is only meaningful for Unix.  It is a no-op under
755  ** windows since windows does not support hard links.
756  **
757  ** On success, a handle for a previously open file is at *id is
758  ** updated with the new directory file descriptor and SQLITE_OK is
759  ** returned.
760  **
761  ** On failure, the function returns SQLITE_CANTOPEN and leaves
762  ** *id unchanged.
763  */
sqliteOsOpenDirectory(const char * zDirname,OsFile * id)764  int sqliteOsOpenDirectory(
765    const char *zDirname,
766    OsFile *id
767  ){
768  #if OS_UNIX
769    if( id->fd<0 ){
770      /* Do not open the directory if the corresponding file is not already
771      ** open. */
772      return SQLITE_CANTOPEN;
773    }
774    assert( id->dirfd<0 );
775    id->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0644);
776    if( id->dirfd<0 ){
777      return SQLITE_CANTOPEN;
778    }
779    TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
780  #endif
781    return SQLITE_OK;
782  }
783  
784  /*
785  ** If the following global variable points to a string which is the
786  ** name of a directory, then that directory will be used to store
787  ** temporary files.
788  */
789  const char *sqlite_temp_directory = 0;
790  
791  /*
792  ** Create a temporary file name in zBuf.  zBuf must be big enough to
793  ** hold at least SQLITE_TEMPNAME_SIZE characters.
794  */
sqliteOsTempFileName(char * zBuf)795  int sqliteOsTempFileName(char *zBuf){
796  #if OS_UNIX
797    static const char *azDirs[] = {
798       0,
799       "/var/tmp",
800       "/usr/tmp",
801       "/tmp",
802       ".",
803    };
804    static unsigned char zChars[] =
805      "abcdefghijklmnopqrstuvwxyz"
806      "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
807      "0123456789";
808    int i, j;
809    struct stat buf;
810    const char *zDir = ".";
811    azDirs[0] = sqlite_temp_directory;
812    for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
813      if( azDirs[i]==0 ) continue;
814      if( stat(azDirs[i], &buf) ) continue;
815      if( !S_ISDIR(buf.st_mode) ) continue;
816      if( access(azDirs[i], 07) ) continue;
817      zDir = azDirs[i];
818      break;
819    }
820    do{
821      sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
822      j = strlen(zBuf);
823      sqliteRandomness(15, &zBuf[j]);
824      for(i=0; i<15; i++, j++){
825        zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
826      }
827      zBuf[j] = 0;
828    }while( access(zBuf,0)==0 );
829  #endif
830  #if OS_WIN
831    static char zChars[] =
832      "abcdefghijklmnopqrstuvwxyz"
833      "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
834      "0123456789";
835    int i, j;
836    const char *zDir;
837    char zTempPath[SQLITE_TEMPNAME_SIZE];
838    if( sqlite_temp_directory==0 ){
839      GetTempPath(SQLITE_TEMPNAME_SIZE-30, zTempPath);
840      for(i=strlen(zTempPath); i>0 && zTempPath[i-1]=='\\'; i--){}
841      zTempPath[i] = 0;
842      zDir = zTempPath;
843    }else{
844      zDir = sqlite_temp_directory;
845    }
846    for(;;){
847      sprintf(zBuf, "%s\\"TEMP_FILE_PREFIX, zDir);
848      j = strlen(zBuf);
849      sqliteRandomness(15, &zBuf[j]);
850      for(i=0; i<15; i++, j++){
851        zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
852      }
853      zBuf[j] = 0;
854      if( !sqliteOsFileExists(zBuf) ) break;
855    }
856  #endif
857  #if OS_MAC
858    static char zChars[] =
859      "abcdefghijklmnopqrstuvwxyz"
860      "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
861      "0123456789";
862    int i, j;
863    char *zDir;
864    char zTempPath[SQLITE_TEMPNAME_SIZE];
865    char zdirName[32];
866    CInfoPBRec infoRec;
867    Str31 dirName;
868    memset(&infoRec, 0, sizeof(infoRec));
869    memset(zTempPath, 0, SQLITE_TEMPNAME_SIZE);
870    if( sqlite_temp_directory!=0 ){
871      zDir = sqlite_temp_directory;
872    }else if( FindFolder(kOnSystemDisk, kTemporaryFolderType,  kCreateFolder,
873         &(infoRec.dirInfo.ioVRefNum), &(infoRec.dirInfo.ioDrParID)) == noErr ){
874      infoRec.dirInfo.ioNamePtr = dirName;
875      do{
876        infoRec.dirInfo.ioFDirIndex = -1;
877        infoRec.dirInfo.ioDrDirID = infoRec.dirInfo.ioDrParID;
878        if( PBGetCatInfoSync(&infoRec) == noErr ){
879          CopyPascalStringToC(dirName, zdirName);
880          i = strlen(zdirName);
881          memmove(&(zTempPath[i+1]), zTempPath, strlen(zTempPath));
882          strcpy(zTempPath, zdirName);
883          zTempPath[i] = ':';
884        }else{
885          *zTempPath = 0;
886          break;
887        }
888      } while( infoRec.dirInfo.ioDrDirID != fsRtDirID );
889      zDir = zTempPath;
890    }
891    if( zDir[0]==0 ){
892      getcwd(zTempPath, SQLITE_TEMPNAME_SIZE-24);
893      zDir = zTempPath;
894    }
895    for(;;){
896      sprintf(zBuf, "%s"TEMP_FILE_PREFIX, zDir);
897      j = strlen(zBuf);
898      sqliteRandomness(15, &zBuf[j]);
899      for(i=0; i<15; i++, j++){
900        zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
901      }
902      zBuf[j] = 0;
903      if( !sqliteOsFileExists(zBuf) ) break;
904    }
905  #endif
906    return SQLITE_OK;
907  }
908  
909  /*
910  ** Close a file.
911  */
sqliteOsClose(OsFile * id)912  int sqliteOsClose(OsFile *id){
913  #if OS_UNIX
914    sqliteOsUnlock(id);
915    if( id->dirfd>=0 ) close(id->dirfd);
916    id->dirfd = -1;
917    sqliteOsEnterMutex();
918    if( id->pOpen->nLock ){
919      /* If there are outstanding locks, do not actually close the file just
920      ** yet because that would clear those locks.  Instead, add the file
921      ** descriptor to pOpen->aPending.  It will be automatically closed when
922      ** the last lock is cleared.
923      */
924      int *aNew;
925      struct openCnt *pOpen = id->pOpen;
926      pOpen->nPending++;
927      aNew = sqliteRealloc( pOpen->aPending, pOpen->nPending*sizeof(int) );
928      if( aNew==0 ){
929        /* If a malloc fails, just leak the file descriptor */
930      }else{
931        pOpen->aPending = aNew;
932        pOpen->aPending[pOpen->nPending-1] = id->fd;
933      }
934    }else{
935      /* There are no outstanding locks so we can close the file immediately */
936      close(id->fd);
937    }
938    releaseLockInfo(id->pLock);
939    releaseOpenCnt(id->pOpen);
940    sqliteOsLeaveMutex();
941    TRACE2("CLOSE   %-3d\n", id->fd);
942    OpenCounter(-1);
943    return SQLITE_OK;
944  #endif
945  #if OS_WIN
946    CloseHandle(id->h);
947    OpenCounter(-1);
948    return SQLITE_OK;
949  #endif
950  #if OS_MAC
951    if( id->refNumRF!=-1 )
952      FSClose(id->refNumRF);
953  # ifdef _LARGE_FILE
954    FSCloseFork(id->refNum);
955  # else
956    FSClose(id->refNum);
957  # endif
958    if( id->delOnClose ){
959      unlink(id->pathToDel);
960      sqliteFree(id->pathToDel);
961    }
962    OpenCounter(-1);
963    return SQLITE_OK;
964  #endif
965  }
966  
967  /*
968  ** Read data from a file into a buffer.  Return SQLITE_OK if all
969  ** bytes were read successfully and SQLITE_IOERR if anything goes
970  ** wrong.
971  */
sqliteOsRead(OsFile * id,void * pBuf,int amt)972  int sqliteOsRead(OsFile *id, void *pBuf, int amt){
973  #if OS_UNIX
974    int got;
975    SimulateIOError(SQLITE_IOERR);
976    TIMER_START;
977    got = read(id->fd, pBuf, amt);
978    TIMER_END;
979    TRACE4("READ    %-3d %7d %d\n", id->fd, last_page, elapse);
980    SEEK(0);
981    /* if( got<0 ) got = 0; */
982    if( got==amt ){
983      return SQLITE_OK;
984    }else{
985      return SQLITE_IOERR;
986    }
987  #endif
988  #if OS_WIN
989    DWORD got;
990    SimulateIOError(SQLITE_IOERR);
991    TRACE2("READ %d\n", last_page);
992    if( !ReadFile(id->h, pBuf, amt, &got, 0) ){
993      got = 0;
994    }
995    if( got==(DWORD)amt ){
996      return SQLITE_OK;
997    }else{
998      return SQLITE_IOERR;
999    }
1000  #endif
1001  #if OS_MAC
1002    int got;
1003    SimulateIOError(SQLITE_IOERR);
1004    TRACE2("READ %d\n", last_page);
1005  # ifdef _LARGE_FILE
1006    FSReadFork(id->refNum, fsAtMark, 0, (ByteCount)amt, pBuf, (ByteCount*)&got);
1007  # else
1008    got = amt;
1009    FSRead(id->refNum, &got, pBuf);
1010  # endif
1011    if( got==amt ){
1012      return SQLITE_OK;
1013    }else{
1014      return SQLITE_IOERR;
1015    }
1016  #endif
1017  }
1018  
1019  /*
1020  ** Write data from a buffer into a file.  Return SQLITE_OK on success
1021  ** or some other error code on failure.
1022  */
sqliteOsWrite(OsFile * id,const void * pBuf,int amt)1023  int sqliteOsWrite(OsFile *id, const void *pBuf, int amt){
1024  #if OS_UNIX
1025    int wrote = 0;
1026    SimulateIOError(SQLITE_IOERR);
1027    TIMER_START;
1028    while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){
1029      amt -= wrote;
1030      pBuf = &((char*)pBuf)[wrote];
1031    }
1032    TIMER_END;
1033    TRACE4("WRITE   %-3d %7d %d\n", id->fd, last_page, elapse);
1034    SEEK(0);
1035    if( amt>0 ){
1036      return SQLITE_FULL;
1037    }
1038    return SQLITE_OK;
1039  #endif
1040  #if OS_WIN
1041    int rc;
1042    DWORD wrote;
1043    SimulateIOError(SQLITE_IOERR);
1044    TRACE2("WRITE %d\n", last_page);
1045    while( amt>0 && (rc = WriteFile(id->h, pBuf, amt, &wrote, 0))!=0 && wrote>0 ){
1046      amt -= wrote;
1047      pBuf = &((char*)pBuf)[wrote];
1048    }
1049    if( !rc || amt>(int)wrote ){
1050      return SQLITE_FULL;
1051    }
1052    return SQLITE_OK;
1053  #endif
1054  #if OS_MAC
1055    OSErr oserr;
1056    int wrote = 0;
1057    SimulateIOError(SQLITE_IOERR);
1058    TRACE2("WRITE %d\n", last_page);
1059    while( amt>0 ){
1060  # ifdef _LARGE_FILE
1061      oserr = FSWriteFork(id->refNum, fsAtMark, 0,
1062                          (ByteCount)amt, pBuf, (ByteCount*)&wrote);
1063  # else
1064      wrote = amt;
1065      oserr = FSWrite(id->refNum, &wrote, pBuf);
1066  # endif
1067      if( wrote == 0 || oserr != noErr)
1068        break;
1069      amt -= wrote;
1070      pBuf = &((char*)pBuf)[wrote];
1071    }
1072    if( oserr != noErr || amt>wrote ){
1073      return SQLITE_FULL;
1074    }
1075    return SQLITE_OK;
1076  #endif
1077  }
1078  
1079  /*
1080  ** Move the read/write pointer in a file.
1081  */
sqliteOsSeek(OsFile * id,off_t offset)1082  int sqliteOsSeek(OsFile *id, off_t offset){
1083    SEEK(offset/1024 + 1);
1084  #if OS_UNIX
1085    lseek(id->fd, offset, SEEK_SET);
1086    return SQLITE_OK;
1087  #endif
1088  #if OS_WIN
1089    {
1090      LONG upperBits = offset>>32;
1091      LONG lowerBits = offset & 0xffffffff;
1092      DWORD rc;
1093      rc = SetFilePointer(id->h, lowerBits, &upperBits, FILE_BEGIN);
1094      /* TRACE3("SEEK rc=0x%x upper=0x%x\n", rc, upperBits); */
1095    }
1096    return SQLITE_OK;
1097  #endif
1098  #if OS_MAC
1099    {
1100      off_t curSize;
1101      if( sqliteOsFileSize(id, &curSize) != SQLITE_OK ){
1102        return SQLITE_IOERR;
1103      }
1104      if( offset >= curSize ){
1105        if( sqliteOsTruncate(id, offset+1) != SQLITE_OK ){
1106          return SQLITE_IOERR;
1107        }
1108      }
1109  # ifdef _LARGE_FILE
1110      if( FSSetForkPosition(id->refNum, fsFromStart, offset) != noErr ){
1111  # else
1112      if( SetFPos(id->refNum, fsFromStart, offset) != noErr ){
1113  # endif
1114        return SQLITE_IOERR;
1115      }else{
1116        return SQLITE_OK;
1117      }
1118    }
1119  #endif
1120  }
1121  
1122  /*
1123  ** Make sure all writes to a particular file are committed to disk.
1124  **
1125  ** Under Unix, also make sure that the directory entry for the file
1126  ** has been created by fsync-ing the directory that contains the file.
1127  ** If we do not do this and we encounter a power failure, the directory
1128  ** entry for the journal might not exist after we reboot.  The next
1129  ** SQLite to access the file will not know that the journal exists (because
1130  ** the directory entry for the journal was never created) and the transaction
1131  ** will not roll back - possibly leading to database corruption.
1132  */
1133  int sqliteOsSync(OsFile *id){
1134  #if OS_UNIX
1135    SimulateIOError(SQLITE_IOERR);
1136    TRACE2("SYNC    %-3d\n", id->fd);
1137    if( fsync(id->fd) ){
1138      return SQLITE_IOERR;
1139    }else{
1140      if( id->dirfd>=0 ){
1141        TRACE2("DIRSYNC %-3d\n", id->dirfd);
1142        fsync(id->dirfd);
1143        close(id->dirfd);  /* Only need to sync once, so close the directory */
1144        id->dirfd = -1;    /* when we are done. */
1145      }
1146      return SQLITE_OK;
1147    }
1148  #endif
1149  #if OS_WIN
1150    if( FlushFileBuffers(id->h) ){
1151      return SQLITE_OK;
1152    }else{
1153      return SQLITE_IOERR;
1154    }
1155  #endif
1156  #if OS_MAC
1157  # ifdef _LARGE_FILE
1158    if( FSFlushFork(id->refNum) != noErr ){
1159  # else
1160    ParamBlockRec params;
1161    memset(&params, 0, sizeof(ParamBlockRec));
1162    params.ioParam.ioRefNum = id->refNum;
1163    if( PBFlushFileSync(&params) != noErr ){
1164  # endif
1165      return SQLITE_IOERR;
1166    }else{
1167      return SQLITE_OK;
1168    }
1169  #endif
1170  }
1171  
1172  /*
1173  ** Truncate an open file to a specified size
1174  */
1175  int sqliteOsTruncate(OsFile *id, off_t nByte){
1176    SimulateIOError(SQLITE_IOERR);
1177  #if OS_UNIX
1178    return ftruncate(id->fd, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
1179  #endif
1180  #if OS_WIN
1181    {
1182      LONG upperBits = nByte>>32;
1183      SetFilePointer(id->h, nByte, &upperBits, FILE_BEGIN);
1184      SetEndOfFile(id->h);
1185    }
1186    return SQLITE_OK;
1187  #endif
1188  #if OS_MAC
1189  # ifdef _LARGE_FILE
1190    if( FSSetForkSize(id->refNum, fsFromStart, nByte) != noErr){
1191  # else
1192    if( SetEOF(id->refNum, nByte) != noErr ){
1193  # endif
1194      return SQLITE_IOERR;
1195    }else{
1196      return SQLITE_OK;
1197    }
1198  #endif
1199  }
1200  
1201  /*
1202  ** Determine the current size of a file in bytes
1203  */
1204  int sqliteOsFileSize(OsFile *id, off_t *pSize){
1205  #if OS_UNIX
1206    struct stat buf;
1207    SimulateIOError(SQLITE_IOERR);
1208    if( fstat(id->fd, &buf)!=0 ){
1209      return SQLITE_IOERR;
1210    }
1211    *pSize = buf.st_size;
1212    return SQLITE_OK;
1213  #endif
1214  #if OS_WIN
1215    DWORD upperBits, lowerBits;
1216    SimulateIOError(SQLITE_IOERR);
1217    lowerBits = GetFileSize(id->h, &upperBits);
1218    *pSize = (((off_t)upperBits)<<32) + lowerBits;
1219    return SQLITE_OK;
1220  #endif
1221  #if OS_MAC
1222  # ifdef _LARGE_FILE
1223    if( FSGetForkSize(id->refNum, pSize) != noErr){
1224  # else
1225    if( GetEOF(id->refNum, pSize) != noErr ){
1226  # endif
1227      return SQLITE_IOERR;
1228    }else{
1229      return SQLITE_OK;
1230    }
1231  #endif
1232  }
1233  
1234  #if OS_WIN
1235  /*
1236  ** Return true (non-zero) if we are running under WinNT, Win2K or WinXP.
1237  ** Return false (zero) for Win95, Win98, or WinME.
1238  **
1239  ** Here is an interesting observation:  Win95, Win98, and WinME lack
1240  ** the LockFileEx() API.  But we can still statically link against that
1241  ** API as long as we don't call it win running Win95/98/ME.  A call to
1242  ** this routine is used to determine if the host is Win95/98/ME or
1243  ** WinNT/2K/XP so that we will know whether or not we can safely call
1244  ** the LockFileEx() API.
1245  */
1246  int isNT(void){
1247    static int osType = 0;   /* 0=unknown 1=win95 2=winNT */
1248    if( osType==0 ){
1249      OSVERSIONINFO sInfo;
1250      sInfo.dwOSVersionInfoSize = sizeof(sInfo);
1251      GetVersionEx(&sInfo);
1252      osType = sInfo.dwPlatformId==VER_PLATFORM_WIN32_NT ? 2 : 1;
1253    }
1254    return osType==2;
1255  }
1256  #endif
1257  
1258  /*
1259  ** Windows file locking notes:  [similar issues apply to MacOS]
1260  **
1261  ** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because
1262  ** those functions are not available.  So we use only LockFile() and
1263  ** UnlockFile().
1264  **
1265  ** LockFile() prevents not just writing but also reading by other processes.
1266  ** (This is a design error on the part of Windows, but there is nothing
1267  ** we can do about that.)  So the region used for locking is at the
1268  ** end of the file where it is unlikely to ever interfere with an
1269  ** actual read attempt.
1270  **
1271  ** A database read lock is obtained by locking a single randomly-chosen
1272  ** byte out of a specific range of bytes. The lock byte is obtained at
1273  ** random so two separate readers can probably access the file at the
1274  ** same time, unless they are unlucky and choose the same lock byte.
1275  ** A database write lock is obtained by locking all bytes in the range.
1276  ** There can only be one writer.
1277  **
1278  ** A lock is obtained on the first byte of the lock range before acquiring
1279  ** either a read lock or a write lock.  This prevents two processes from
1280  ** attempting to get a lock at a same time.  The semantics of
1281  ** sqliteOsReadLock() require that if there is already a write lock, that
1282  ** lock is converted into a read lock atomically.  The lock on the first
1283  ** byte allows us to drop the old write lock and get the read lock without
1284  ** another process jumping into the middle and messing us up.  The same
1285  ** argument applies to sqliteOsWriteLock().
1286  **
1287  ** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available,
1288  ** which means we can use reader/writer locks.  When reader writer locks
1289  ** are used, the lock is placed on the same range of bytes that is used
1290  ** for probabilistic locking in Win95/98/ME.  Hence, the locking scheme
1291  ** will support two or more Win95 readers or two or more WinNT readers.
1292  ** But a single Win95 reader will lock out all WinNT readers and a single
1293  ** WinNT reader will lock out all other Win95 readers.
1294  **
1295  ** Note: On MacOS we use the resource fork for locking.
1296  **
1297  ** The following #defines specify the range of bytes used for locking.
1298  ** N_LOCKBYTE is the number of bytes available for doing the locking.
1299  ** The first byte used to hold the lock while the lock is changing does
1300  ** not count toward this number.  FIRST_LOCKBYTE is the address of
1301  ** the first byte in the range of bytes used for locking.
1302  */
1303  #define N_LOCKBYTE       10239
1304  #if OS_MAC
1305  # define FIRST_LOCKBYTE   (0x000fffff - N_LOCKBYTE)
1306  #else
1307  # define FIRST_LOCKBYTE   (0xffffffff - N_LOCKBYTE)
1308  #endif
1309  
1310  /*
1311  ** Change the status of the lock on the file "id" to be a readlock.
1312  ** If the file was write locked, then this reduces the lock to a read.
1313  ** If the file was read locked, then this acquires a new read lock.
1314  **
1315  ** Return SQLITE_OK on success and SQLITE_BUSY on failure.  If this
1316  ** library was compiled with large file support (LFS) but LFS is not
1317  ** available on the host, then an SQLITE_NOLFS is returned.
1318  */
1319  int sqliteOsReadLock(OsFile *id){
1320  #if OS_UNIX
1321    int rc;
1322    sqliteOsEnterMutex();
1323    if( id->pLock->cnt>0 ){
1324      if( !id->locked ){
1325        id->pLock->cnt++;
1326        id->locked = 1;
1327        id->pOpen->nLock++;
1328      }
1329      rc = SQLITE_OK;
1330    }else if( id->locked || id->pLock->cnt==0 ){
1331      struct flock lock;
1332      int s;
1333      lock.l_type = F_RDLCK;
1334      lock.l_whence = SEEK_SET;
1335      lock.l_start = lock.l_len = 0L;
1336      s = fcntl(id->fd, F_SETLK, &lock);
1337      if( s!=0 ){
1338        rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1339      }else{
1340        rc = SQLITE_OK;
1341        if( !id->locked ){
1342          id->pOpen->nLock++;
1343          id->locked = 1;
1344        }
1345        id->pLock->cnt = 1;
1346      }
1347    }else{
1348      rc = SQLITE_BUSY;
1349    }
1350    sqliteOsLeaveMutex();
1351    return rc;
1352  #endif
1353  #if OS_WIN
1354    int rc;
1355    if( id->locked>0 ){
1356      rc = SQLITE_OK;
1357    }else{
1358      int lk;
1359      int res;
1360      int cnt = 100;
1361      sqliteRandomness(sizeof(lk), &lk);
1362      lk = (lk & 0x7fffffff)%N_LOCKBYTE + 1;
1363      while( cnt-->0 && (res = LockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0))==0 ){
1364        Sleep(1);
1365      }
1366      if( res ){
1367        UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1368        if( isNT() ){
1369          OVERLAPPED ovlp;
1370          ovlp.Offset = FIRST_LOCKBYTE+1;
1371          ovlp.OffsetHigh = 0;
1372          ovlp.hEvent = 0;
1373          res = LockFileEx(id->h, LOCKFILE_FAIL_IMMEDIATELY,
1374                            0, N_LOCKBYTE, 0, &ovlp);
1375        }else{
1376          res = LockFile(id->h, FIRST_LOCKBYTE+lk, 0, 1, 0);
1377        }
1378        UnlockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0);
1379      }
1380      if( res ){
1381        id->locked = lk;
1382        rc = SQLITE_OK;
1383      }else{
1384        rc = SQLITE_BUSY;
1385      }
1386    }
1387    return rc;
1388  #endif
1389  #if OS_MAC
1390    int rc;
1391    if( id->locked>0 || id->refNumRF == -1 ){
1392      rc = SQLITE_OK;
1393    }else{
1394      int lk;
1395      OSErr res;
1396      int cnt = 5;
1397      ParamBlockRec params;
1398      sqliteRandomness(sizeof(lk), &lk);
1399      lk = (lk & 0x7fffffff)%N_LOCKBYTE + 1;
1400      memset(&params, 0, sizeof(params));
1401      params.ioParam.ioRefNum = id->refNumRF;
1402      params.ioParam.ioPosMode = fsFromStart;
1403      params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1404      params.ioParam.ioReqCount = 1;
1405      while( cnt-->0 && (res = PBLockRangeSync(&params))!=noErr ){
1406        UInt32 finalTicks;
1407        Delay(1, &finalTicks); /* 1/60 sec */
1408      }
1409      if( res == noErr ){
1410        params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1411        params.ioParam.ioReqCount = N_LOCKBYTE;
1412        PBUnlockRangeSync(&params);
1413        params.ioParam.ioPosOffset = FIRST_LOCKBYTE+lk;
1414        params.ioParam.ioReqCount = 1;
1415        res = PBLockRangeSync(&params);
1416        params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1417        params.ioParam.ioReqCount = 1;
1418        PBUnlockRangeSync(&params);
1419      }
1420      if( res == noErr ){
1421        id->locked = lk;
1422        rc = SQLITE_OK;
1423      }else{
1424        rc = SQLITE_BUSY;
1425      }
1426    }
1427    return rc;
1428  #endif
1429  }
1430  
1431  /*
1432  ** Change the lock status to be an exclusive or write lock.  Return
1433  ** SQLITE_OK on success and SQLITE_BUSY on a failure.  If this
1434  ** library was compiled with large file support (LFS) but LFS is not
1435  ** available on the host, then an SQLITE_NOLFS is returned.
1436  */
1437  int sqliteOsWriteLock(OsFile *id){
1438  #if OS_UNIX
1439    int rc;
1440    sqliteOsEnterMutex();
1441    if( id->pLock->cnt==0 || (id->pLock->cnt==1 && id->locked==1) ){
1442      struct flock lock;
1443      int s;
1444      lock.l_type = F_WRLCK;
1445      lock.l_whence = SEEK_SET;
1446      lock.l_start = lock.l_len = 0L;
1447      s = fcntl(id->fd, F_SETLK, &lock);
1448      if( s!=0 ){
1449        rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1450      }else{
1451        rc = SQLITE_OK;
1452        if( !id->locked ){
1453          id->pOpen->nLock++;
1454          id->locked = 1;
1455        }
1456        id->pLock->cnt = -1;
1457      }
1458    }else{
1459      rc = SQLITE_BUSY;
1460    }
1461    sqliteOsLeaveMutex();
1462    return rc;
1463  #endif
1464  #if OS_WIN
1465    int rc;
1466    if( id->locked<0 ){
1467      rc = SQLITE_OK;
1468    }else{
1469      int res;
1470      int cnt = 100;
1471      while( cnt-->0 && (res = LockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0))==0 ){
1472        Sleep(1);
1473      }
1474      if( res ){
1475        if( id->locked>0 ){
1476          if( isNT() ){
1477            UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1478          }else{
1479            res = UnlockFile(id->h, FIRST_LOCKBYTE + id->locked, 0, 1, 0);
1480          }
1481        }
1482        if( res ){
1483          res = LockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1484        }else{
1485          res = 0;
1486        }
1487        UnlockFile(id->h, FIRST_LOCKBYTE, 0, 1, 0);
1488      }
1489      if( res ){
1490        id->locked = -1;
1491        rc = SQLITE_OK;
1492      }else{
1493        rc = SQLITE_BUSY;
1494      }
1495    }
1496    return rc;
1497  #endif
1498  #if OS_MAC
1499    int rc;
1500    if( id->locked<0 || id->refNumRF == -1 ){
1501      rc = SQLITE_OK;
1502    }else{
1503      OSErr res;
1504      int cnt = 5;
1505      ParamBlockRec params;
1506      memset(&params, 0, sizeof(params));
1507      params.ioParam.ioRefNum = id->refNumRF;
1508      params.ioParam.ioPosMode = fsFromStart;
1509      params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1510      params.ioParam.ioReqCount = 1;
1511      while( cnt-->0 && (res = PBLockRangeSync(&params))!=noErr ){
1512        UInt32 finalTicks;
1513        Delay(1, &finalTicks); /* 1/60 sec */
1514      }
1515      if( res == noErr ){
1516        params.ioParam.ioPosOffset = FIRST_LOCKBYTE + id->locked;
1517        params.ioParam.ioReqCount = 1;
1518        if( id->locked==0
1519              || PBUnlockRangeSync(&params)==noErr ){
1520          params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1521          params.ioParam.ioReqCount = N_LOCKBYTE;
1522          res = PBLockRangeSync(&params);
1523        }else{
1524          res = afpRangeNotLocked;
1525        }
1526        params.ioParam.ioPosOffset = FIRST_LOCKBYTE;
1527        params.ioParam.ioReqCount = 1;
1528        PBUnlockRangeSync(&params);
1529      }
1530      if( res == noErr ){
1531        id->locked = -1;
1532        rc = SQLITE_OK;
1533      }else{
1534        rc = SQLITE_BUSY;
1535      }
1536    }
1537    return rc;
1538  #endif
1539  }
1540  
1541  /*
1542  ** Unlock the given file descriptor.  If the file descriptor was
1543  ** not previously locked, then this routine is a no-op.  If this
1544  ** library was compiled with large file support (LFS) but LFS is not
1545  ** available on the host, then an SQLITE_NOLFS is returned.
1546  */
1547  int sqliteOsUnlock(OsFile *id){
1548  #if OS_UNIX
1549    int rc;
1550    if( !id->locked ) return SQLITE_OK;
1551    sqliteOsEnterMutex();
1552    assert( id->pLock->cnt!=0 );
1553    if( id->pLock->cnt>1 ){
1554      id->pLock->cnt--;
1555      rc = SQLITE_OK;
1556    }else{
1557      struct flock lock;
1558      int s;
1559      lock.l_type = F_UNLCK;
1560      lock.l_whence = SEEK_SET;
1561      lock.l_start = lock.l_len = 0L;
1562      s = fcntl(id->fd, F_SETLK, &lock);
1563      if( s!=0 ){
1564        rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1565      }else{
1566        rc = SQLITE_OK;
1567        id->pLock->cnt = 0;
1568      }
1569    }
1570    if( rc==SQLITE_OK ){
1571      /* Decrement the count of locks against this same file.  When the
1572      ** count reaches zero, close any other file descriptors whose close
1573      ** was deferred because of outstanding locks.
1574      */
1575      struct openCnt *pOpen = id->pOpen;
1576      pOpen->nLock--;
1577      assert( pOpen->nLock>=0 );
1578      if( pOpen->nLock==0 && pOpen->nPending>0 ){
1579        int i;
1580        for(i=0; i<pOpen->nPending; i++){
1581          close(pOpen->aPending[i]);
1582        }
1583        sqliteFree(pOpen->aPending);
1584        pOpen->nPending = 0;
1585        pOpen->aPending = 0;
1586      }
1587    }
1588    sqliteOsLeaveMutex();
1589    id->locked = 0;
1590    return rc;
1591  #endif
1592  #if OS_WIN
1593    int rc;
1594    if( id->locked==0 ){
1595      rc = SQLITE_OK;
1596    }else if( isNT() || id->locked<0 ){
1597      UnlockFile(id->h, FIRST_LOCKBYTE+1, 0, N_LOCKBYTE, 0);
1598      rc = SQLITE_OK;
1599      id->locked = 0;
1600    }else{
1601      UnlockFile(id->h, FIRST_LOCKBYTE+id->locked, 0, 1, 0);
1602      rc = SQLITE_OK;
1603      id->locked = 0;
1604    }
1605    return rc;
1606  #endif
1607  #if OS_MAC
1608    int rc;
1609    ParamBlockRec params;
1610    memset(&params, 0, sizeof(params));
1611    params.ioParam.ioRefNum = id->refNumRF;
1612    params.ioParam.ioPosMode = fsFromStart;
1613    if( id->locked==0 || id->refNumRF == -1 ){
1614      rc = SQLITE_OK;
1615    }else if( id->locked<0 ){
1616      params.ioParam.ioPosOffset = FIRST_LOCKBYTE+1;
1617      params.ioParam.ioReqCount = N_LOCKBYTE;
1618      PBUnlockRangeSync(&params);
1619      rc = SQLITE_OK;
1620      id->locked = 0;
1621    }else{
1622      params.ioParam.ioPosOffset = FIRST_LOCKBYTE+id->locked;
1623      params.ioParam.ioReqCount = 1;
1624      PBUnlockRangeSync(&params);
1625      rc = SQLITE_OK;
1626      id->locked = 0;
1627    }
1628    return rc;
1629  #endif
1630  }
1631  
1632  /*
1633  ** Get information to seed the random number generator.  The seed
1634  ** is written into the buffer zBuf[256].  The calling function must
1635  ** supply a sufficiently large buffer.
1636  */
1637  int sqliteOsRandomSeed(char *zBuf){
1638    /* We have to initialize zBuf to prevent valgrind from reporting
1639    ** errors.  The reports issued by valgrind are incorrect - we would
1640    ** prefer that the randomness be increased by making use of the
1641    ** uninitialized space in zBuf - but valgrind errors tend to worry
1642    ** some users.  Rather than argue, it seems easier just to initialize
1643    ** the whole array and silence valgrind, even if that means less randomness
1644    ** in the random seed.
1645    **
1646    ** When testing, initializing zBuf[] to zero is all we do.  That means
1647    ** that we always use the same random number sequence.* This makes the
1648    ** tests repeatable.
1649    */
1650    memset(zBuf, 0, 256);
1651  #if OS_UNIX && !defined(SQLITE_TEST)
1652    {
1653      int pid;
1654      time((time_t*)zBuf);
1655      pid = getpid();
1656      memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
1657    }
1658  #endif
1659  #if OS_WIN && !defined(SQLITE_TEST)
1660    GetSystemTime((LPSYSTEMTIME)zBuf);
1661  #endif
1662  #if OS_MAC
1663    {
1664      int pid;
1665      Microseconds((UnsignedWide*)zBuf);
1666      pid = getpid();
1667      memcpy(&zBuf[sizeof(UnsignedWide)], &pid, sizeof(pid));
1668    }
1669  #endif
1670    return SQLITE_OK;
1671  }
1672  
1673  /*
1674  ** Sleep for a little while.  Return the amount of time slept.
1675  */
1676  int sqliteOsSleep(int ms){
1677  #if OS_UNIX
1678  #if defined(HAVE_USLEEP) && HAVE_USLEEP
1679    usleep(ms*1000);
1680    return ms;
1681  #else
1682    sleep((ms+999)/1000);
1683    return 1000*((ms+999)/1000);
1684  #endif
1685  #endif
1686  #if OS_WIN
1687    Sleep(ms);
1688    return ms;
1689  #endif
1690  #if OS_MAC
1691    UInt32 finalTicks;
1692    UInt32 ticks = (((UInt32)ms+16)*3)/50;  /* 1/60 sec per tick */
1693    Delay(ticks, &finalTicks);
1694    return (int)((ticks*50)/3);
1695  #endif
1696  }
1697  
1698  /*
1699  ** Static variables used for thread synchronization
1700  */
1701  static int inMutex = 0;
1702  #ifdef SQLITE_UNIX_THREADS
1703    static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
1704  #endif
1705  #ifdef SQLITE_W32_THREADS
1706    static CRITICAL_SECTION cs;
1707  #endif
1708  #ifdef SQLITE_MACOS_MULTITASKING
1709    static MPCriticalRegionID criticalRegion;
1710  #endif
1711  
1712  /*
1713  ** The following pair of routine implement mutual exclusion for
1714  ** multi-threaded processes.  Only a single thread is allowed to
1715  ** executed code that is surrounded by EnterMutex() and LeaveMutex().
1716  **
1717  ** SQLite uses only a single Mutex.  There is not much critical
1718  ** code and what little there is executes quickly and without blocking.
1719  */
1720  void sqliteOsEnterMutex(){
1721  #ifdef SQLITE_UNIX_THREADS
1722    pthread_mutex_lock(&mutex);
1723  #endif
1724  #ifdef SQLITE_W32_THREADS
1725    static int isInit = 0;
1726    while( !isInit ){
1727      static long lock = 0;
1728      if( InterlockedIncrement(&lock)==1 ){
1729        InitializeCriticalSection(&cs);
1730        isInit = 1;
1731      }else{
1732        Sleep(1);
1733      }
1734    }
1735    EnterCriticalSection(&cs);
1736  #endif
1737  #ifdef SQLITE_MACOS_MULTITASKING
1738    static volatile int notInit = 1;
1739    if( notInit ){
1740      if( notInit == 2 ) /* as close as you can get to thread safe init */
1741        MPYield();
1742      else{
1743        notInit = 2;
1744        MPCreateCriticalRegion(&criticalRegion);
1745        notInit = 0;
1746      }
1747    }
1748    MPEnterCriticalRegion(criticalRegion, kDurationForever);
1749  #endif
1750    assert( !inMutex );
1751    inMutex = 1;
1752  }
1753  void sqliteOsLeaveMutex(){
1754    assert( inMutex );
1755    inMutex = 0;
1756  #ifdef SQLITE_UNIX_THREADS
1757    pthread_mutex_unlock(&mutex);
1758  #endif
1759  #ifdef SQLITE_W32_THREADS
1760    LeaveCriticalSection(&cs);
1761  #endif
1762  #ifdef SQLITE_MACOS_MULTITASKING
1763    MPExitCriticalRegion(criticalRegion);
1764  #endif
1765  }
1766  
1767  /*
1768  ** Turn a relative pathname into a full pathname.  Return a pointer
1769  ** to the full pathname stored in space obtained from sqliteMalloc().
1770  ** The calling function is responsible for freeing this space once it
1771  ** is no longer needed.
1772  */
1773  char *sqliteOsFullPathname(const char *zRelative){
1774  #if OS_UNIX
1775    char *zFull = 0;
1776    if( zRelative[0]=='/' ){
1777      sqliteSetString(&zFull, zRelative, (char*)0);
1778    }else{
1779      char zBuf[5000];
1780      sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), "/", zRelative,
1781                      (char*)0);
1782    }
1783    return zFull;
1784  #endif
1785  #if OS_WIN
1786    char *zNotUsed;
1787    char *zFull;
1788    int nByte;
1789    nByte = GetFullPathName(zRelative, 0, 0, &zNotUsed) + 1;
1790    zFull = sqliteMalloc( nByte );
1791    if( zFull==0 ) return 0;
1792    GetFullPathName(zRelative, nByte, zFull, &zNotUsed);
1793    return zFull;
1794  #endif
1795  #if OS_MAC
1796    char *zFull = 0;
1797    if( zRelative[0]==':' ){
1798      char zBuf[_MAX_PATH+1];
1799      sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), &(zRelative[1]),
1800                      (char*)0);
1801    }else{
1802      if( strchr(zRelative, ':') ){
1803        sqliteSetString(&zFull, zRelative, (char*)0);
1804      }else{
1805      char zBuf[_MAX_PATH+1];
1806        sqliteSetString(&zFull, getcwd(zBuf, sizeof(zBuf)), zRelative, (char*)0);
1807      }
1808    }
1809    return zFull;
1810  #endif
1811  }
1812  
1813  /*
1814  ** The following variable, if set to a non-zero value, becomes the result
1815  ** returned from sqliteOsCurrentTime().  This is used for testing.
1816  */
1817  #ifdef SQLITE_TEST
1818  int sqlite_current_time = 0;
1819  #endif
1820  
1821  /*
1822  ** Find the current time (in Universal Coordinated Time).  Write the
1823  ** current time and date as a Julian Day number into *prNow and
1824  ** return 0.  Return 1 if the time and date cannot be found.
1825  */
1826  int sqliteOsCurrentTime(double *prNow){
1827  #if OS_UNIX
1828    time_t t;
1829    time(&t);
1830    *prNow = t/86400.0 + 2440587.5;
1831  #endif
1832  #if OS_WIN
1833    FILETIME ft;
1834    /* FILETIME structure is a 64-bit value representing the number of
1835       100-nanosecond intervals since January 1, 1601 (= JD 2305813.5).
1836    */
1837    double now;
1838    GetSystemTimeAsFileTime( &ft );
1839    now = ((double)ft.dwHighDateTime) * 4294967296.0;
1840    *prNow = (now + ft.dwLowDateTime)/864000000000.0 + 2305813.5;
1841  #endif
1842  #ifdef SQLITE_TEST
1843    if( sqlite_current_time ){
1844      *prNow = sqlite_current_time/86400.0 + 2440587.5;
1845    }
1846  #endif
1847    return 0;
1848  }
1849