xref: /titanic_41/usr/src/tools/onbld/Scm/Backup.py (revision ac823a0f31ed9c8cbe505cee2def262ac92ea6cd)
1#
2#  This program is free software; you can redistribute it and/or modify
3#  it under the terms of the GNU General Public License version 2
4#  as published by the Free Software Foundation.
5#
6#  This program is distributed in the hope that it will be useful,
7#  but WITHOUT ANY WARRANTY; without even the implied warranty of
8#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9#  GNU General Public License for more details.
10#
11#  You should have received a copy of the GNU General Public License
12#  along with this program; if not, write to the Free Software
13#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14#
15
16#
17# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
18# Use is subject to license terms.
19#
20# Copyright 2008, 2010, Richard Lowe
21#
22
23'''
24Workspace backup
25
26Backup format is:
27   backupdir/
28      wsname/
29         generation#/
30            dirstate (handled by CdmUncommittedBackup)
31                File containing dirstate nodeid (the changeset we need
32                to update the workspace to after applying the bundle).
33                This is the node to which the working copy changes
34                (see 'diff', below) will be applied if applicable.
35
36            bundle (handled by CdmCommittedBackup)
37                An Hg bundle containing outgoing committed changes.
38
39            nodes (handled by CdmCommittedBackup)
40                A text file listing the full (hex) nodeid of all nodes in
41                bundle, used by need_backup.
42
43            diff (handled by CdmUncommittedBackup)
44                A Git-formatted diff containing uncommitted changes.
45
46            renames (handled by CdmUncommittedBackup)
47                A list of renames in the working copy that have to be
48                applied manually, rather than by the diff.
49
50            metadata.tar.gz (handled by CdmMetadataBackup)
51                $CODEMGR_WS/.hg/hgrc
52                $CODEMGR_WS/.hg/localtags
53                $CODEMGR_WS/.hg/patches (Mq data)
54
55         latest -> generation#
56            Newest backup generation.
57
58All files in a given backup generation, with the exception of
59dirstate, are optional.
60'''
61
62import os, pwd, shutil, tarfile, time, traceback
63from mercurial import changegroup, error, node, patch, util
64
65
66class CdmNodeMissing(util.Abort):
67    '''a required node is not present in the destination workspace.
68
69    This may occur both in the case where the bundle contains a
70    changeset which is a child of a node not present in the
71    destination workspace (because the destination workspace is not as
72    up-to-date as the source), or because the source and destination
73    workspace are not related.
74
75    It may also happen in cases where the uncommitted changes need to
76    be applied onto a node that the workspace does not possess even
77    after application of the bundle (on a branch not present
78    in the bundle or destination workspace, for instance)'''
79
80    def __init__(self, msg, name):
81        #
82        # If e.name is a string 20 characters long, it is
83        # assumed to be a node.  (Mercurial makes this
84        # same assumption, when creating a LookupError)
85        #
86        if isinstance(name, str) and len(name) == 20:
87            n = node.short(name)
88        else:
89            n = name
90
91        util.Abort.__init__(self, "%s: changeset '%s' is missing\n"
92                            "Your workspace is either not "
93                            "sufficiently up to date,\n"
94                            "or is unrelated to the workspace from "
95                            "which the backup was taken.\n" % (msg, n))
96
97
98class CdmCommittedBackup(object):
99    '''Backup of committed changes'''
100
101    def __init__(self, backup, ws):
102        self.ws = ws
103        self.bu = backup
104        self.files = ('bundle', 'nodes')
105
106    def _outgoing_nodes(self, parent):
107        '''Return a list of all outgoing nodes in hex format'''
108
109        if parent:
110            outgoing = self.ws.findoutgoing(parent)
111            nodes = self.ws.repo.changelog.nodesbetween(outgoing)[0]
112            return map(node.hex, nodes)
113        else:
114            return []
115
116    def backup(self):
117        '''Backup committed changes'''
118        parent = self.ws.parent()
119
120        if not parent:
121            self.ws.ui.warn('Workspace has no parent, committed changes will '
122                            'not be backed up\n')
123            return
124
125        out = self.ws.findoutgoing(parent)
126        if not out:
127            return
128
129        cg = self.ws.repo.changegroup(out, 'bundle')
130        changegroup.writebundle(cg, self.bu.backupfile('bundle'), 'HG10BZ')
131
132        outnodes = self._outgoing_nodes(parent)
133        if outnodes:
134            fp = None
135            try:
136                try:
137                    fp = open(self.bu.backupfile('nodes'), 'w')
138                    fp.write('%s\n' % '\n'.join(outnodes))
139                except EnvironmentError, e:
140                    raise util.Abort("couldn't store outgoing nodes: %s" % e)
141            finally:
142                if fp and not fp.closed:
143                    fp.close()
144
145    def restore(self):
146        '''Restore committed changes from backup'''
147        bfile = self.bu.backupfile('bundle')
148
149        if os.path.exists(bfile):
150            f = None
151            try:
152                try:
153                    f = open(bfile, 'r')
154                    bundle = changegroup.readbundle(f, bfile)
155                    self.ws.repo.addchangegroup(bundle, 'strip',
156                                                'bundle:%s' % bfile)
157                except EnvironmentError, e:
158                    raise util.Abort("couldn't restore committed changes: %s\n"
159                                     "   %s" % (bfile, e))
160                except error.LookupError, e:
161                    raise CdmNodeMissing("couldn't restore committed changes",
162                                                     e.name)
163            finally:
164                if f and not f.closed:
165                    f.close()
166
167    def need_backup(self):
168        '''Compare backup of committed changes to workspace'''
169
170        if os.path.exists(self.bu.backupfile('nodes')):
171            f = None
172            try:
173                try:
174                    f = open(self.bu.backupfile('nodes'))
175                    bnodes = set([line.rstrip('\r\n')
176                                  for line in f.readlines()])
177                    f.close()
178                except EnvironmentError, e:
179                    raise util.Abort("couldn't open backup node list: %s" % e)
180            finally:
181                if f and not f.closed:
182                    f.close()
183        else:
184            bnodes = set()
185
186        outnodes = set(self._outgoing_nodes(self.ws.parent()))
187        if outnodes != bnodes:
188            return True
189
190        return False
191
192    def cleanup(self):
193        '''Remove backed up committed changes'''
194
195        for fname in self.files:
196            if os.path.exists(self.bu.backupfile(fname)):
197                os.unlink(self.bu.backupfile(fname))
198
199
200class CdmUncommittedBackup(object):
201    '''Backup of uncommitted changes'''
202
203    def __init__(self, backup, ws):
204        self.ws = ws
205        self.bu = backup
206
207    def _clobbering_renames(self):
208        '''Return a list of pairs of files representing renames/copies
209        that clobber already versioned files.  [(oldname newname)...]'''
210
211        #
212        # Note that this doesn't handle uncommitted merges
213        # as CdmUncommittedBackup itself doesn't.
214        #
215        wctx = self.ws.workingctx()
216        parent = wctx.parents()[0]
217
218        ret = []
219        for fname in wctx.added() + wctx.modified():
220            rn = wctx.filectx(fname).renamed()
221            if rn and fname in parent:
222                ret.append((rn[0], fname))
223        return ret
224
225    def backup(self):
226        '''Backup uncommitted changes'''
227
228        if self.ws.merged():
229            raise util.Abort("Unable to backup an uncommitted merge.\n"
230                             "Please complete your merge and commit")
231
232        dirstate = node.hex(self.ws.workingctx().parents()[0].node())
233
234        fp = None
235        try:
236            try:
237                fp = open(self.bu.backupfile('dirstate'), 'w')
238                fp.write(dirstate + '\n')
239            except EnvironmentError, e:
240                raise util.Abort("couldn't save working copy parent: %s" % e)
241        finally:
242            if fp and not fp.closed:
243                fp.close()
244
245        try:
246            try:
247                fp = open(self.bu.backupfile('renames'), 'w')
248                for cons in self._clobbering_renames():
249                    fp.write("%s %s\n" % cons)
250            except EnvironmentError, e:
251                raise util.Abort("couldn't save clobbering copies: %s" % e)
252        finally:
253            if fp and not fp.closed:
254                fp.close()
255
256        try:
257            try:
258                fp = open(self.bu.backupfile('diff'), 'w')
259                opts = patch.diffopts(self.ws.ui, opts={'git': True})
260                fp.write(self.ws.diff(opts=opts))
261            except EnvironmentError, e:
262                raise util.Abort("couldn't save working copy diff: %s" % e)
263        finally:
264            if fp and not fp.closed:
265                fp.close()
266
267    def _dirstate(self):
268        '''Return the desired working copy node from the backup'''
269        fp = None
270        try:
271            try:
272                fp = open(self.bu.backupfile('dirstate'))
273                dirstate = fp.readline().strip()
274                return dirstate
275            except EnvironmentError, e:
276                raise util.Abort("couldn't read saved parent: %s" % e)
277        finally:
278            if fp and not fp.closed:
279                fp.close()
280
281    def restore(self):
282        '''Restore uncommitted changes'''
283        diff = self.bu.backupfile('diff')
284        dirstate = self._dirstate()
285
286        #
287        # Check that the patch's parent changeset exists.
288        #
289        try:
290            n = node.bin(dirstate)
291            self.ws.repo.changelog.lookup(n)
292        except error.LookupError, e:
293            raise CdmNodeMissing("couldn't restore uncommitted changes",
294                                 e.name)
295
296        try:
297            self.ws.clean(rev=dirstate)
298        except util.Abort, e:
299            raise util.Abort("couldn't update to saved node: %s" % e)
300
301        if not os.path.exists(diff):
302            return
303
304        #
305        # There's a race here whereby if the patch (or part thereof)
306        # is applied within the same second as the clean above (such
307        # that mtime doesn't change) and if the size of that file
308        # does not change, Hg may not see the change.
309        #
310        # We sleep a full second to avoid this, as sleeping merely
311        # until the next second begins would require very close clock
312        # synchronization on network filesystems.
313        #
314        time.sleep(1)
315
316        files = {}
317        try:
318            try:
319                fuzz = patch.patch(diff, self.ws.ui, strip=1,
320                                   cwd=self.ws.repo.root, files=files)
321                if fuzz:
322                    raise util.Abort('working copy diff applied with fuzz')
323            except Exception, e:
324                raise util.Abort("couldn't apply working copy diff: %s\n"
325                                 "   %s" % (diff, e))
326        finally:
327            patch.updatedir(self.ws.ui, self.ws.repo, files)
328
329        if not os.path.exists(self.bu.backupfile('renames')):
330            return
331
332        #
333        # We need to re-apply name changes where the new name
334        # (rename/copy destination) is an already versioned file, as
335        # Hg would otherwise ignore them.
336        #
337        try:
338            fp = open(self.bu.backupfile('renames'))
339            for line in fp:
340                source, dest = line.strip().split()
341                self.ws.copy(source, dest)
342        except EnvironmentError, e:
343            raise util.Abort('unable to open renames file: %s' % e)
344        except ValueError:
345            raise util.Abort('corrupt renames file: %s' %
346                             self.bu.backupfile('renames'))
347
348    def need_backup(self):
349        '''Compare backup of uncommitted changes to workspace'''
350        cnode = self.ws.workingctx().parents()[0].node()
351        if self._dirstate() != node.hex(cnode):
352            return True
353
354        opts = patch.diffopts(self.ws.ui, opts={'git': True})
355        curdiff = self.ws.diff(opts=opts)
356
357        diff = self.bu.backupfile('diff')
358        if os.path.exists(diff):
359            try:
360                try:
361                    fd = open(diff)
362                    backdiff = fd.read()
363                except EnvironmentError, e:
364                    raise util.Abort("couldn't open backup diff %s\n"
365                                     "   %s" % (diff, e))
366            finally:
367                if fd and not fd.closed:
368                    fd.close()
369        else:
370            backdiff = ''
371
372        if backdiff != curdiff:
373            return True
374
375
376        currrenamed = self._clobbering_renames()
377        bakrenamed = None
378
379        if os.path.exists(self.bu.backupfile('renames')):
380            try:
381                try:
382                    fd = open(self.bu.backupfile('renames'))
383                    bakrenamed = [line.strip().split(' ') for line in fd]
384                except EnvironmentError, e:
385                    raise util.Abort("couldn't open renames file %s: %s\n" %
386                                     (self.bu.backupfile('renames'), e))
387            finally:
388                if fd and not fd.closed:
389                    fd.close()
390
391            if currrenamed != bakrenamed:
392                return True
393
394        return False
395
396    def cleanup(self):
397        '''Remove backed up uncommitted changes'''
398        for fname in ('dirstate', 'diff', 'renames'):
399            if os.path.exists(self.bu.backupfile(fname)):
400                os.unlink(self.bu.backupfile(fname))
401
402
403class CdmMetadataBackup(object):
404    '''Backup of workspace metadata'''
405
406    def __init__(self, backup, ws):
407        self.bu = backup
408        self.ws = ws
409        self.files = ('hgrc', 'localtags', 'patches', 'cdm')
410
411    def backup(self):
412        '''Backup workspace metadata'''
413
414        tar = None
415
416        try:
417            try:
418                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'),
419                                   'w:gz')
420                tar.errorlevel = 2
421            except (EnvironmentError, tarfile.TarError), e:
422                raise util.Abort("couldn't open %s for writing: %s" %
423                                 (self.bu.backupfile('metadata.tar.gz'), e))
424
425            try:
426                for elt in self.files:
427                    fpath = self.ws.repo.join(elt)
428                    if os.path.exists(fpath):
429                        tar.add(fpath, elt)
430            except (EnvironmentError, tarfile.TarError), e:
431                #
432                # tarfile.TarError doesn't include the tar member or file
433                # in question, so we have to do so ourselves.
434                #
435                if isinstance(e, tarfile.TarError):
436                    errstr = "%s: %s" % (elt, e)
437                else:
438                    errstr = str(e)
439
440                raise util.Abort("couldn't backup metadata to %s:\n"
441                                 "  %s" %
442                                 (self.bu.backupfile('metadata.tar.gz'),
443                                  errstr))
444        finally:
445            if tar and not tar.closed:
446                tar.close()
447
448    def old_restore(self):
449        '''Restore workspace metadata from an pre-tar backup'''
450
451        for fname in self.files:
452            bfile = self.bu.backupfile(fname)
453            wfile = self.ws.repo.join(fname)
454
455            if os.path.exists(bfile):
456                try:
457                    shutil.copy2(bfile, wfile)
458                except EnvironmentError, e:
459                    raise util.Abort("couldn't restore metadata from %s:\n"
460                                     "   %s" % (bfile, e))
461
462    def tar_restore(self):
463        '''Restore workspace metadata (from a tar-style backup)'''
464
465        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
466            tar = None
467
468            try:
469                try:
470                    tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
471                    tar.errorlevel = 2
472                except (EnvironmentError, tarfile.TarError), e:
473                    raise util.Abort("couldn't open %s: %s" %
474                                 (self.bu.backupfile('metadata.tar.gz'), e))
475
476                try:
477                    for elt in tar:
478                        tar.extract(elt, path=self.ws.repo.path)
479                except (EnvironmentError, tarfile.TarError), e:
480                    # Make sure the member name is in the exception message.
481                    if isinstance(e, tarfile.TarError):
482                        errstr = "%s: %s" % (elt.name, e)
483                    else:
484                        errstr = str(e)
485
486                    raise util.Abort("couldn't restore metadata from %s:\n"
487                                     "   %s" %
488                                     (self.bu.backupfile('metadata.tar.gz'),
489                                      errstr))
490            finally:
491                if tar and not tar.closed:
492                    tar.close()
493
494    def restore(self):
495        '''Restore workspace metadata'''
496
497        if os.path.exists(self.bu.backupfile('hgrc')):
498            self.old_restore()
499        else:
500            self.tar_restore()
501
502    def need_backup(self):
503        '''Compare backed up workspace metadata to workspace'''
504
505        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
506            try:
507                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
508                tar.errorlevel = 2
509            except (EnvironmentError, tarfile.TarError), e:
510                raise util.Abort("couldn't open metadata tarball: %s\n"
511                                 "   %s" %
512                                 (self.bu.backupfile('metadata.tar.gz'), e))
513
514            for elt in tar:
515                fpath = self.ws.repo.join(elt.name)
516                if not os.path.exists(fpath):
517                    return True     # File in tar, not workspace
518
519                if elt.isdir():     # Don't care about directories
520                    continue
521
522                #
523                # The filesystem can give us mtime with fractional seconds
524                # (as a float), whereas tar files only keep it to the second.
525                #
526                # Always compare to the integer (second-granularity) mtime.
527                #
528                if (elt.mtime != int(os.path.getmtime(fpath)) or
529                    elt.size != os.path.getsize(fpath)):
530                    return True
531
532            tarnames = tar.getnames()
533            tar.close()
534        else:
535            tarnames = []
536
537        for mfile in self.files:
538            fpath = self.ws.repo.join(mfile)
539
540            if os.path.isdir(fpath):
541                # Directories in tarfile always end with a '/'
542                if not mfile.endswith('/'):
543                    mfile += '/'
544
545                if mfile not in tarnames:
546                    return True
547
548                for root, dirs, files in os.walk(fpath, topdown=True):
549                    for elt in files:
550                        path = os.path.join(root, elt)
551
552                        rpath = self.ws.repo.path
553                        if not rpath.endswith('/'):
554                            rpath += '/'
555
556                        path = path.replace(rpath, '', 1)
557                        if path not in tarnames:
558                            return True # In workspace not tar
559            else:
560                if os.path.exists(fpath) and mfile not in tarnames:
561                    return True
562
563        return False
564
565    def cleanup(self):
566        '''Remove backed up workspace metadata'''
567        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
568            os.unlink(self.bu.backupfile('metadata.tar.gz'))
569
570
571class CdmBackup(object):
572    '''A backup of a given workspace'''
573
574    def __init__(self, ui, ws, name):
575        self.ws = ws
576        self.ui = ui
577        self.backupdir = self._find_backup_dir(name)
578
579        #
580        # The order of instances here controls the order the various operations
581        # are run.
582        #
583        # There's some inherent dependence, in that on restore we need
584        # to restore committed changes prior to uncommitted changes
585        # (as the parent revision of any uncommitted changes is quite
586        # likely to not exist until committed changes are restored).
587        # Metadata restore can happen at any point, but happens last
588        # as a matter of convention.
589        #
590        self.modules = [x(self, ws) for x in [CdmCommittedBackup,
591                                              CdmUncommittedBackup,
592                                              CdmMetadataBackup]]
593
594
595        if os.path.exists(os.path.join(self.backupdir, 'latest')):
596            generation = os.readlink(os.path.join(self.backupdir, 'latest'))
597            self.generation = int(os.path.split(generation)[1])
598        else:
599            self.generation = 0
600
601    def _find_backup_dir(self, name):
602        '''Find the path to an appropriate backup directory based on NAME'''
603        backupdir = None
604        backupbase = None
605
606        if os.path.isabs(name):
607            return name
608
609        if self.ui.config('cdm', 'backupdir'):
610            backupbase = os.path.expanduser(self.ui.config('cdm', 'backupdir'))
611        else:
612            home = None
613
614            try:
615                home = os.getenv('HOME') or pwd.getpwuid(os.getuid()).pw_dir
616            except KeyError:
617                pass                    # Handled anyway
618
619            if not home:
620                raise util.Abort('Could not determine your HOME directory to '
621                                 'find backup path')
622
623            backupbase = os.path.join(home, 'cdm.backup')
624
625        backupdir = os.path.join(backupbase, name)
626
627        # If backupdir exists, it must be a directory.
628        if (os.path.exists(backupdir) and not os.path.isdir(backupdir)):
629            raise util.Abort('%s exists but is not a directory' % backupdir)
630
631        return backupdir
632
633    def backupfile(self, path):
634        '''return full path to backup file FILE at GEN'''
635        return os.path.join(self.backupdir, str(self.generation), path)
636
637    def update_latest(self, gen):
638        '''Update latest symlink to point to the current generation'''
639        linkpath = os.path.join(self.backupdir, 'latest')
640
641        if os.path.lexists(linkpath):
642            os.unlink(linkpath)
643
644        os.symlink(str(gen), linkpath)
645
646    def create_gen(self, gen):
647        '''Create a new backup generation'''
648        try:
649            os.makedirs(os.path.join(self.backupdir, str(gen)))
650            self.update_latest(gen)
651        except EnvironmentError, e:
652            raise util.Abort("Couldn't create backup generation %s: %s" %
653                             (os.path.join(self.backupdir, str(gen)), e))
654
655    def need_backup(self):
656        '''Compare backed up changes to workspace'''
657        #
658        # If there's no current backup generation, or the last backup was
659        # invalid (lacking the dirstate file), we need a backup regardless
660        # of anything else.
661        #
662        if (not self.generation or
663            not os.path.exists(self.backupfile('dirstate'))):
664            return True
665
666        for x in self.modules:
667            if x.need_backup():
668                return True
669
670        return False
671
672    def backup(self):
673        '''Take a backup of the current workspace
674
675        Calling code is expected to hold both the working copy lock
676        and repository lock.'''
677
678        if not os.path.exists(self.backupdir):
679            try:
680                os.makedirs(self.backupdir)
681            except EnvironmentError, e:
682                raise util.Abort('Could not create backup directory %s: %s' %
683                                 (self.backupdir, e))
684
685        self.generation += 1
686        self.create_gen(self.generation)
687
688        try:
689            for x in self.modules:
690                x.backup()
691        except Exception, e:
692            if isinstance(e, KeyboardInterrupt):
693                self.ws.ui.warn("Interrupted\n")
694            else:
695                self.ws.ui.warn("Error: %s\n" % e)
696                show_traceback = self.ws.ui.configbool('ui', 'traceback',
697                                                       False)
698
699                #
700                # If it's not a 'normal' error, we want to print a stack
701                # trace now in case the attempt to remove the partial
702                # backup also fails, and raises a second exception.
703                #
704                if (not isinstance(e, (EnvironmentError, util.Abort))
705                    or show_traceback):
706                    traceback.print_exc()
707
708            for x in self.modules:
709                x.cleanup()
710
711            os.rmdir(os.path.join(self.backupdir, str(self.generation)))
712            self.generation -= 1
713
714            if self.generation != 0:
715                self.update_latest(self.generation)
716            else:
717                os.unlink(os.path.join(self.backupdir, 'latest'))
718
719            raise util.Abort('Backup failed')
720
721    def restore(self, gen=None):
722        '''Restore workspace from backup
723
724        Restores from backup generation GEN (defaulting to the latest)
725        into workspace WS.
726
727        Calling code is expected to hold both the working copy lock
728        and repository lock of the destination workspace.'''
729
730        if not os.path.exists(self.backupdir):
731            raise util.Abort('Backup directory does not exist: %s' %
732                             (self.backupdir))
733
734        if gen:
735            if not os.path.exists(os.path.join(self.backupdir, str(gen))):
736                raise util.Abort('Backup generation does not exist: %s' %
737                                 (os.path.join(self.backupdir, str(gen))))
738            self.generation = int(gen)
739
740        if not self.generation: # This is ok, 0 is not a valid generation
741            raise util.Abort('Backup has no generations: %s' % self.backupdir)
742
743        if not os.path.exists(self.backupfile('dirstate')):
744            raise util.Abort('Backup %s/%s is incomplete (dirstate missing)' %
745                             (self.backupdir, self.generation))
746
747        try:
748            for x in self.modules:
749                x.restore()
750        except util.Abort, e:
751            raise util.Abort('Error restoring workspace:\n'
752                             '%s\n'
753                             'Workspace may be partially restored' % e)
754