xref: /titanic_44/usr/src/tools/onbld/Scm/Backup.py (revision e4b86885570d77af552e9cf94f142f4d744fb8c8)
1#
2#  This program is free software; you can redistribute it and/or modify
3#  it under the terms of the GNU General Public License version 2
4#  as published by the Free Software Foundation.
5#
6#  This program is distributed in the hope that it will be useful,
7#  but WITHOUT ANY WARRANTY; without even the implied warranty of
8#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9#  GNU General Public License for more details.
10#
11#  You should have received a copy of the GNU General Public License
12#  along with this program; if not, write to the Free Software
13#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14#
15
16#
17# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
18# Use is subject to license terms.
19#
20
21'''
22Workspace backup
23
24Backup format is:
25   backupdir/
26      wsname/
27         generation#/
28            dirstate (handled by CdmUncommittedBackup)
29                File containing dirstate nodeid (the tip we expect to be at
30                after applying the bundle).
31
32            bundle (handled by CdmCommittedBackup)
33                An Hg bundle containing outgoing committed changes.
34
35            nodes (handled by CdmCommittedBackup)
36                A text file listing the full (hex) nodeid of all nodes in
37                bundle, used by need_backup.
38
39            diff (handled by CdmUncommittedBackup)
40                A Git-formatted diff containing uncommitted changes.
41
42            renames (handled by CdmUncommittedBackup)
43                A list of renames in the working copy that have to be
44                applied manually, rather than by the diff.
45
46            metadata.tar.gz (handled by CdmMetadataBackup)
47                $CODEMGR_WS/.hg/hgrc
48                $CODEMGR_WS/.hg/localtags
49                $CODEMGR_WS/.hg/patches (Mq data)
50
51         latest -> generation#
52            Newest backup generation.
53
54All files in a given backup generation, with the exception of
55dirstate, are optional.
56'''
57
58import os, pwd, shutil, traceback, tarfile, time
59from mercurial import changegroup, patch, node, util
60from cStringIO import StringIO
61
62
63class CdmCommittedBackup(object):
64    '''Backup of committed changes'''
65
66    def __init__(self, backup, ws):
67        self.ws = ws
68        self.bu = backup
69        self.files = ('bundle', 'nodes')
70
71    def _outgoing_nodes(self, parent):
72        '''Return a list of all outgoing nodes in hex format'''
73
74        if parent:
75            outgoing = self.ws.findoutgoing(parent)
76            nodes = self.ws.repo.changelog.nodesbetween(outgoing)[0]
77            return map(node.hex, nodes)
78        else:
79            return []
80
81    def backup(self):
82        '''Backup committed changes'''
83        parent = self.ws.parent()
84
85        if not parent:
86            self.ws.ui.warn('Workspace has no parent, committed changes will '
87                            'not be backed up\n')
88            return
89
90        out = self.ws.findoutgoing(parent)
91        if not out:
92            return
93
94        cg = self.ws.repo.changegroup(out, 'bundle')
95        changegroup.writebundle(cg, self.bu.backupfile('bundle'), 'HG10BZ')
96
97        outnodes = self._outgoing_nodes(parent)
98        if outnodes:
99            fp = None
100            try:
101                try:
102                    fp = open(self.bu.backupfile('nodes'), 'w')
103                    fp.write('%s\n' % '\n'.join(outnodes))
104                except EnvironmentError, e:
105                    raise util.Abort("couldn't store outgoing nodes: %s" % e)
106            finally:
107                if fp and not fp.closed:
108                    fp.close()
109
110    def restore(self):
111        '''Restore committed changes from backup'''
112        bfile = self.bu.backupfile('bundle')
113
114        if os.path.exists(bfile):
115            f = None
116            try:
117                try:
118                    f = open(bfile, 'r')
119                    bundle = changegroup.readbundle(f, bfile)
120                    self.ws.repo.addchangegroup(bundle, 'strip',
121                                                'bundle:%s' % bfile)
122                except EnvironmentError, e:
123                    raise util.Abort("couldn't restore committed changes: %s\n"
124                                     "   %s" % (bfile, e))
125            finally:
126                if f and not f.closed:
127                    f.close()
128
129    def need_backup(self):
130        '''Compare backup of committed changes to workspace'''
131
132        if os.path.exists(self.bu.backupfile('nodes')):
133            f = None
134            try:
135                try:
136                    f = open(self.bu.backupfile('nodes'))
137                    bnodes = set([line.rstrip('\r\n')
138                                  for line in f.readlines()])
139                    f.close()
140                except EnvironmentError, e:
141                    raise util.Abort("couldn't open backup node list: %s" % e)
142            finally:
143                if f and not f.closed:
144                    f.close()
145        else:
146            bnodes = set()
147
148        outnodes = set(self._outgoing_nodes(self.ws.parent()))
149        if outnodes != bnodes:
150            return True
151
152        return False
153
154    def cleanup(self):
155        '''Remove backed up committed changes'''
156
157        for fname in self.files:
158            if os.path.exists(self.bu.backupfile(fname)):
159                os.unlink(self.bu.backupfile(fname))
160
161
162class CdmUncommittedBackup(object):
163    '''Backup of uncommitted changes'''
164
165    def __init__(self, backup, ws):
166        self.ws = ws
167        self.bu = backup
168
169    def _clobbering_renames(self):
170        '''Return a list of pairs of files representing renames/copies
171        that clobber already versioned files.  [(oldname newname)...]'''
172
173        #
174        # Note that this doesn't handle uncommitted merges
175        # as CdmUncommittedBackup itself doesn't.
176        #
177        wctx = self.ws.repo.workingctx()
178        parent = wctx.parents()[0]
179
180        ret = []
181        for fname in wctx.added() + wctx.modified():
182            rn = wctx.filectx(fname).renamed()
183            if rn and fname in parent:
184                ret.append((rn[0], fname))
185        return ret
186
187    def backup(self):
188        '''Backup uncommitted changes'''
189
190        if self.ws.merged():
191            raise util.Abort("Unable to backup an uncommitted merge.\n"
192                             "Please complete your merge and commit")
193
194        dirstate = node.hex(self.ws.repo.changectx().node())
195
196        fp = None
197        try:
198            try:
199                fp = open(self.bu.backupfile('dirstate'), 'w')
200                fp.write(dirstate + '\n')
201            except EnvironmentError, e:
202                raise util.Abort("couldn't save working copy parent: %s" % e)
203        finally:
204            if fp and not fp.closed:
205                fp.close()
206
207        try:
208            try:
209                fp = open(self.bu.backupfile('renames'), 'w')
210                for cons in self._clobbering_renames():
211                    fp.write("%s %s\n" % cons)
212            except EnvironmentError, e:
213                raise util.Abort("couldn't save clobbering copies: %s" % e)
214        finally:
215            if fp and not fp.closed:
216                fp.close()
217
218        try:
219            try:
220                fp = open(self.bu.backupfile('diff'), 'w')
221                patch.diff(self.ws.repo, fp=fp,
222                           opts=patch.diffopts(self.ws.ui, opts={'git': True}))
223            except EnvironmentError, e:
224                raise util.Abort("couldn't save working copy diff: %s" % e)
225        finally:
226            if fp and not fp.closed:
227                fp.close()
228
229    def _dirstate(self):
230        '''Return the current working copy node'''
231        fp = None
232        try:
233            try:
234                fp = open(self.bu.backupfile('dirstate'))
235                dirstate = fp.readline().strip()
236                return dirstate
237            except EnvironmentError, e:
238                raise util.Abort("couldn't read saved parent: %s" % e)
239        finally:
240            if fp and not fp.closed:
241                fp.close()
242
243    def restore(self):
244        '''Restore uncommitted changes'''
245        diff = self.bu.backupfile('diff')
246        dirstate = self._dirstate()
247
248        try:
249            self.ws.clean(rev=dirstate)
250        except util.Abort, e:
251            raise util.Abort("couldn't update to saved node: %s" % e)
252
253        if not os.path.exists(diff):
254            return
255
256        #
257        # There's a race here whereby if the patch (or part thereof)
258        # is applied within the same second as the clean above (such
259        # that mtime doesn't change) and if the size of that file
260        # does not change, Hg may not see the change.
261        #
262        # We sleep a full second to avoid this, as sleeping merely
263        # until the next second begins would require very close clock
264        # synchronization on network filesystems.
265        #
266        time.sleep(1)
267
268        files = {}
269        try:
270            try:
271                fuzz = patch.patch(diff, self.ws.ui, strip=1,
272                                   cwd=self.ws.repo.root, files=files)
273                if fuzz:
274                    raise util.Abort('working copy diff applied with fuzz')
275            except Exception, e:
276                raise util.Abort("couldn't apply working copy diff: %s\n"
277                                 "   %s" % (diff, e))
278        finally:
279            patch.updatedir(self.ws.ui, self.ws.repo, files)
280
281        if not os.path.exists(self.bu.backupfile('renames')):
282            return
283
284        #
285        # We need to re-apply name changes where the new name
286        # (rename/copy destination) is an already versioned file, as
287        # Hg would otherwise ignore them.
288        #
289        try:
290            fp = open(self.bu.backupfile('renames'))
291            for line in fp:
292                source, dest = line.strip().split()
293                self.ws.repo.copy(source, dest)
294        except EnvironmentError, e:
295            raise util.Abort('unable to open renames file: %s' % e)
296        except ValueError:
297            raise util.Abort('corrupt renames file: %s' %
298                             self.bu.backupfile('renames'))
299
300    def need_backup(self):
301        '''Compare backup of uncommitted changes to workspace'''
302        if self._dirstate() != node.hex(self.ws.repo.changectx().node()):
303            return True
304
305        curdiff = StringIO()
306        diff = self.bu.backupfile('diff')
307        fd = None
308
309        patch.diff(self.ws.repo, fp=curdiff,
310                   opts=patch.diffopts(self.ws.ui, opts={'git': True}))
311
312        if os.path.exists(diff):
313            try:
314                try:
315                    fd = open(diff)
316                    backdiff = fd.read()
317                except EnvironmentError, e:
318                    raise util.Abort("couldn't open backup diff %s\n"
319                                     "   %s" % (diff, e))
320            finally:
321                if fd and not fd.closed:
322                    fd.close()
323        else:
324            backdiff = ''
325
326        if backdiff != curdiff.getvalue():
327            return True
328
329
330        currrenamed = self._clobbering_renames()
331        bakrenamed = None
332
333        if os.path.exists(self.bu.backupfile('renames')):
334            try:
335                try:
336                    fd = open(self.bu.backupfile('renames'))
337                    bakrenamed = [line.strip().split(' ') for line in fd]
338                except EnvironmentError, e:
339                    raise util.Abort("couldn't open renames file %s: %s\n" %
340                                     (self.bu.backupfile('renames'), e))
341            finally:
342                if fd and not fd.closed:
343                    fd.close()
344
345            if currrenamed != bakrenamed:
346                return True
347
348        return False
349
350    def cleanup(self):
351        '''Remove backed up uncommitted changes'''
352        for fname in ('dirstate', 'diff', 'renames'):
353            if os.path.exists(self.bu.backupfile(fname)):
354                os.unlink(self.bu.backupfile(fname))
355
356
357class CdmMetadataBackup(object):
358    '''Backup of workspace metadata'''
359
360    def __init__(self, backup, ws):
361        self.bu = backup
362        self.ws = ws
363        self.files = ('hgrc', 'localtags', 'patches', 'cdm')
364
365    def backup(self):
366        '''Backup workspace metadata'''
367
368        tar = None
369
370        try:
371            try:
372                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'),
373                                   'w:gz')
374                tar.errorlevel = 2
375            except (EnvironmentError, tarfile.TarError), e:
376                raise util.Abort("couldn't open %s for writing: %s" %
377                                 (self.bu.backupfile('metadata.tar.gz'), e))
378
379            try:
380                for elt in self.files:
381                    fpath = self.ws.repo.join(elt)
382                    if os.path.exists(fpath):
383                        tar.add(fpath, elt)
384            except (EnvironmentError, tarfile.TarError), e:
385                #
386                # tarfile.TarError doesn't include the tar member or file
387                # in question, so we have to do so ourselves.
388                #
389                if isinstance(e, tarfile.TarError):
390                    error = "%s: %s" % (elt, e)
391                else:
392                    error = str(e)
393
394                raise util.Abort("couldn't backup metadata to %s:\n"
395                                 "  %s" %
396                                 (self.bu.backupfile('metadata.tar.gz'),
397                                  error))
398        finally:
399            if tar and not tar.closed:
400                tar.close()
401
402    def old_restore(self):
403        '''Restore workspace metadata from an pre-tar backup'''
404
405        for fname in self.files:
406            bfile = self.bu.backupfile(fname)
407            wfile = self.ws.repo.join(fname)
408
409            if os.path.exists(bfile):
410                try:
411                    shutil.copy2(bfile, wfile)
412                except EnvironmentError, e:
413                    raise util.Abort("couldn't restore metadata from %s:\n"
414                                     "   %s" % (bfile, e))
415
416    def tar_restore(self):
417        '''Restore workspace metadata (from a tar-style backup)'''
418
419        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
420            tar = None
421
422            try:
423                try:
424                    tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
425                    tar.errorlevel = 2
426                except (EnvironmentError, tarfile.TarError), e:
427                    raise util.Abort("couldn't open %s: %s" %
428                                 (self.bu.backupfile('metadata.tar.gz'), e))
429
430                try:
431                    for elt in tar:
432                        tar.extract(elt, path=self.ws.repo.path)
433                except (EnvironmentError, tarfile.TarError), e:
434                    # Make sure the member name is in the exception message.
435                    if isinstance(e, tarfile.TarError):
436                        error = "%s: %s" % (elt.name, e)
437                    else:
438                        error = str(e)
439
440                    raise util.Abort("couldn't restore metadata from %s:\n"
441                                     "   %s" %
442                                     (self.bu.backupfile('metadata.tar.gz'),
443                                      error))
444            finally:
445                if tar and not tar.closed:
446                    tar.close()
447
448    def restore(self):
449        '''Restore workspace metadata'''
450
451        if os.path.exists(self.bu.backupfile('hgrc')):
452            self.old_restore()
453        else:
454            self.tar_restore()
455
456    def need_backup(self):
457        '''Compare backed up workspace metadata to workspace'''
458
459        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
460            try:
461                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
462                tar.errorlevel = 2
463            except (EnvironmentError, tarfile.TarError), e:
464                raise util.Abort("couldn't open metadata tarball: %s\n"
465                                 "   %s" %
466                                 (self.bu.backupfile('metadata.tar.gz'), e))
467
468            for elt in tar:
469                fpath = self.ws.repo.join(elt.name)
470                if not os.path.exists(fpath):
471                    return True     # File in tar, not workspace
472
473                if elt.isdir():     # Don't care about directories
474                    continue
475
476                if (elt.mtime != os.path.getmtime(fpath) or
477                    elt.size != os.path.getsize(fpath)):
478                    return True
479
480            tarnames = tar.getnames()
481            tar.close()
482        else:
483            tarnames = []
484
485        for mfile in self.files:
486            fpath = self.ws.repo.join(mfile)
487
488            if os.path.isdir(fpath):
489                # Directories in tarfile always end with a '/'
490                if not mfile.endswith('/'):
491                    mfile += '/'
492
493                if mfile not in tarnames:
494                    return True
495
496                for root, dirs, files in os.walk(fpath, topdown=True):
497                    for elt in files:
498                        path = os.path.join(root, elt)
499
500                        rpath = self.ws.repo.path
501                        if not rpath.endswith('/'):
502                            rpath += '/'
503
504                        path = path.replace(rpath, '', 1)
505                        if path not in tarnames:
506                            return True # In workspace not tar
507            else:
508                if os.path.exists(fpath) and mfile not in tarnames:
509                    return True
510
511        return False
512
513    def cleanup(self):
514        '''Remove backed up workspace metadata'''
515        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
516            os.unlink(self.bu.backupfile('metadata.tar.gz'))
517
518
519class CdmBackup(object):
520    '''A backup of a given workspace'''
521
522    def __init__(self, ui, ws, name):
523        self.ws = ws
524        self.ui = ui
525        self.backupdir = self._find_backup_dir(name)
526
527        #
528        # The order of instances here controls the order the various operations
529        # are run.
530        #
531        # There's some inherent dependence, in that on restore we need
532        # to restore committed changes prior to uncommitted changes
533        # (as the parent revision of any uncommitted changes is quite
534        # likely to not exist until committed changes are restored).
535        # Metadata restore can happen at any point, but happens last
536        # as a matter of convention.
537        #
538        self.modules = [x(self, ws) for x in [CdmCommittedBackup,
539                                              CdmUncommittedBackup,
540                                              CdmMetadataBackup]]
541
542
543        if os.path.exists(os.path.join(self.backupdir, 'latest')):
544            generation = os.readlink(os.path.join(self.backupdir, 'latest'))
545            self.generation = int(os.path.split(generation)[1])
546        else:
547            self.generation = 0
548
549    def _find_backup_dir(self, name):
550        '''Find the path to an appropriate backup directory based on NAME'''
551        backupdir = None
552        backupbase = None
553
554        if os.path.isabs(name):
555            return name
556
557        if self.ui.config('cdm', 'backupdir'):
558            backupbase = os.path.expanduser(self.ui.config('cdm', 'backupdir'))
559        else:
560            home = None
561
562            try:
563                home = os.getenv('HOME') or pwd.getpwuid(os.getuid()).pw_dir
564            except KeyError:
565                pass                    # Handled anyway
566
567            if not home:
568                raise util.Abort('Could not determine your HOME directory to '
569                                 'find backup path')
570
571            backupbase = os.path.join(home, 'cdm.backup')
572
573        backupdir = os.path.join(backupbase, name)
574
575        # If backupdir exists, it must be a directory.
576        if (os.path.exists(backupdir) and not os.path.isdir(backupdir)):
577            raise util.Abort('%s exists but is not a directory' % backupdir)
578
579        return backupdir
580
581    def backupfile(self, path):
582        '''return full path to backup file FILE at GEN'''
583        return os.path.join(self.backupdir, str(self.generation), path)
584
585    def update_latest(self, gen):
586        '''Update latest symlink to point to the current generation'''
587        linkpath = os.path.join(self.backupdir, 'latest')
588
589        if os.path.lexists(linkpath):
590            os.unlink(linkpath)
591
592        os.symlink(str(gen), linkpath)
593
594    def create_gen(self, gen):
595        '''Create a new backup generation'''
596        try:
597            os.makedirs(os.path.join(self.backupdir, str(gen)))
598            self.update_latest(gen)
599        except EnvironmentError, e:
600            raise util.Abort("Couldn't create backup generation %s: %s" %
601                             (os.path.join(self.backupdir, str(gen)), e))
602
603    def need_backup(self):
604        '''Compare backed up changes to workspace'''
605        #
606        # If there's no current backup generation, or the last backup was
607        # invalid (lacking the dirstate file), we need a backup regardless
608        # of anything else.
609        #
610        if (not self.generation or
611            not os.path.exists(self.backupfile('dirstate'))):
612            return True
613
614        for x in self.modules:
615            if x.need_backup():
616                return True
617
618        return False
619
620    def backup(self):
621        '''Take a backup of the current workspace'''
622
623        if not os.path.exists(self.backupdir):
624            try:
625                os.makedirs(self.backupdir)
626            except EnvironmentError, e:
627                raise util.Abort('Could not create backup directory %s: %s' %
628                                 (self.backupdir, e))
629
630        self.generation += 1
631        self.create_gen(self.generation)
632
633        #
634        # Lock the repo, so the backup can be consistent.  We need the
635        # wlock too to make sure the dirstate parent doesn't change
636        # underneath us.
637        #
638
639        lock = self.ws.repo.lock()
640        wlock = self.ws.repo.lock()
641
642        try:
643            for x in self.modules:
644                x.backup()
645        except Exception, e:
646            if isinstance(e, KeyboardInterrupt):
647                self.ws.ui.warn("Interrupted\n")
648            else:
649                self.ws.ui.warn("Error: %s\n" % e)
650
651                #
652                # If it's not a 'normal' error, we want to print a stack
653                # trace now in case the attempt to remove the partial
654                # backup also fails, and raises a second exception.
655                #
656                if (not isinstance(e, (EnvironmentError, util.Abort))
657                    or self.ws.ui.traceback):
658                    traceback.print_exc()
659
660            for x in self.modules:
661                x.cleanup()
662
663            os.rmdir(os.path.join(self.backupdir, str(self.generation)))
664            self.generation -= 1
665
666            if self.generation != 0:
667                self.update_latest(self.generation)
668            else:
669                os.unlink(os.path.join(self.backupdir, 'latest'))
670
671            raise util.Abort('Backup failed')
672
673    def restore(self, gen=None):
674        '''Restore workspace from backup
675
676        Restores from backup generation GEN (defaulting to the latest)
677        into workspace WS.'''
678
679        wlock = self.ws.repo.wlock()
680        lock = self.ws.repo.lock()
681
682        if not os.path.exists(self.backupdir):
683            raise util.Abort('Backup directory does not exist: %s' %
684                             (self.backupdir))
685
686        if gen:
687            if not os.path.exists(os.path.join(self.backupdir, str(gen))):
688                raise util.Abort('Backup generation does not exist: %s' %
689                                 (os.path.join(self.backupdir, str(gen))))
690            self.generation = int(gen)
691
692        if not self.generation: # This is ok, 0 is not a valid generation
693            raise util.Abort('Backup has no generations: %s' % self.backupdir)
694
695        if not os.path.exists(self.backupfile('dirstate')):
696            raise util.Abort('Backup %s/%s is incomplete (dirstate missing)' %
697                             (self.backupdir, self.generation))
698
699        try:
700            for x in self.modules:
701                x.restore()
702        except util.Abort, e:
703            raise util.Abort('Error restoring workspace:\n'
704                             '%s\n'
705                             'Workspace will be partially restored' % e)
706