xref: /titanic_50/usr/src/tools/onbld/Scm/Backup.py (revision 8a3c961b6b8e22607c570d092514b791eb1519e9)
1#
2#  This program is free software; you can redistribute it and/or modify
3#  it under the terms of the GNU General Public License version 2
4#  as published by the Free Software Foundation.
5#
6#  This program is distributed in the hope that it will be useful,
7#  but WITHOUT ANY WARRANTY; without even the implied warranty of
8#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9#  GNU General Public License for more details.
10#
11#  You should have received a copy of the GNU General Public License
12#  along with this program; if not, write to the Free Software
13#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14#
15
16#
17# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
18# Use is subject to license terms.
19#
20
21'''
22Workspace backup
23
24Backup format is:
25   backupdir/
26      wsname/
27         generation#/
28            dirstate (handled by CdmUncommittedBackup)
29                File containing dirstate nodeid (the changeset we need
30                to update the workspace to after applying the bundle).
31                This is the node to which the working copy changes
32                (see 'diff', below) will be applied if applicable.
33
34            bundle (handled by CdmCommittedBackup)
35                An Hg bundle containing outgoing committed changes.
36
37            nodes (handled by CdmCommittedBackup)
38                A text file listing the full (hex) nodeid of all nodes in
39                bundle, used by need_backup.
40
41            diff (handled by CdmUncommittedBackup)
42                A Git-formatted diff containing uncommitted changes.
43
44            renames (handled by CdmUncommittedBackup)
45                A list of renames in the working copy that have to be
46                applied manually, rather than by the diff.
47
48            metadata.tar.gz (handled by CdmMetadataBackup)
49                $CODEMGR_WS/.hg/hgrc
50                $CODEMGR_WS/.hg/localtags
51                $CODEMGR_WS/.hg/patches (Mq data)
52
53         latest -> generation#
54            Newest backup generation.
55
56All files in a given backup generation, with the exception of
57dirstate, are optional.
58'''
59
60import os, pwd, shutil, traceback, tarfile, time
61from mercurial import changegroup, patch, node, util, revlog
62
63
64class CdmNodeMissing(util.Abort):
65    '''a required node is not present in the destination workspace.
66
67    This may occur both in the case where the bundle contains a
68    changeset which is a child of a node not present in the
69    destination workspace (because the destination workspace is not as
70    up-to-date as the source), or because the source and destination
71    workspace are not related.
72
73    It may also happen in cases where the uncommitted changes need to
74    be applied onto a node that the workspace does not possess even
75    after application of the bundle (on a branch not present
76    in the bundle or destination workspace, for instance)'''
77
78    def __init__(self, msg, name):
79        #
80        # If e.name is a string 20 characters long, it is
81        # assumed to be a node.  (Mercurial makes this
82        # same assumption, when creating a LookupError)
83        #
84        if isinstance(name, str) and len(name) == 20:
85            n = node.short(name)
86        else:
87            n = name
88
89        util.Abort.__init__(self, "%s: changeset '%s' is missing\n"
90                            "Your workspace is either not "
91                            "sufficiently up to date,\n"
92                            "or is unrelated to the workspace from "
93                            "which the backup was taken.\n" % (msg, n))
94
95
96class CdmCommittedBackup(object):
97    '''Backup of committed changes'''
98
99    def __init__(self, backup, ws):
100        self.ws = ws
101        self.bu = backup
102        self.files = ('bundle', 'nodes')
103
104    def _outgoing_nodes(self, parent):
105        '''Return a list of all outgoing nodes in hex format'''
106
107        if parent:
108            outgoing = self.ws.findoutgoing(parent)
109            nodes = self.ws.repo.changelog.nodesbetween(outgoing)[0]
110            return map(node.hex, nodes)
111        else:
112            return []
113
114    def backup(self):
115        '''Backup committed changes'''
116        parent = self.ws.parent()
117
118        if not parent:
119            self.ws.ui.warn('Workspace has no parent, committed changes will '
120                            'not be backed up\n')
121            return
122
123        out = self.ws.findoutgoing(parent)
124        if not out:
125            return
126
127        cg = self.ws.repo.changegroup(out, 'bundle')
128        changegroup.writebundle(cg, self.bu.backupfile('bundle'), 'HG10BZ')
129
130        outnodes = self._outgoing_nodes(parent)
131        if outnodes:
132            fp = None
133            try:
134                try:
135                    fp = open(self.bu.backupfile('nodes'), 'w')
136                    fp.write('%s\n' % '\n'.join(outnodes))
137                except EnvironmentError, e:
138                    raise util.Abort("couldn't store outgoing nodes: %s" % e)
139            finally:
140                if fp and not fp.closed:
141                    fp.close()
142
143    def restore(self):
144        '''Restore committed changes from backup'''
145        bfile = self.bu.backupfile('bundle')
146
147        if os.path.exists(bfile):
148            f = None
149            try:
150                try:
151                    f = open(bfile, 'r')
152                    bundle = changegroup.readbundle(f, bfile)
153                    self.ws.repo.addchangegroup(bundle, 'strip',
154                                                'bundle:%s' % bfile)
155                except EnvironmentError, e:
156                    raise util.Abort("couldn't restore committed changes: %s\n"
157                                     "   %s" % (bfile, e))
158                except revlog.LookupError, e:
159                    raise CdmNodeMissing("couldn't restore committed changes",
160                                                     e.name)
161            finally:
162                if f and not f.closed:
163                    f.close()
164
165    def need_backup(self):
166        '''Compare backup of committed changes to workspace'''
167
168        if os.path.exists(self.bu.backupfile('nodes')):
169            f = None
170            try:
171                try:
172                    f = open(self.bu.backupfile('nodes'))
173                    bnodes = set([line.rstrip('\r\n')
174                                  for line in f.readlines()])
175                    f.close()
176                except EnvironmentError, e:
177                    raise util.Abort("couldn't open backup node list: %s" % e)
178            finally:
179                if f and not f.closed:
180                    f.close()
181        else:
182            bnodes = set()
183
184        outnodes = set(self._outgoing_nodes(self.ws.parent()))
185        if outnodes != bnodes:
186            return True
187
188        return False
189
190    def cleanup(self):
191        '''Remove backed up committed changes'''
192
193        for fname in self.files:
194            if os.path.exists(self.bu.backupfile(fname)):
195                os.unlink(self.bu.backupfile(fname))
196
197
198class CdmUncommittedBackup(object):
199    '''Backup of uncommitted changes'''
200
201    def __init__(self, backup, ws):
202        self.ws = ws
203        self.bu = backup
204
205    def _clobbering_renames(self):
206        '''Return a list of pairs of files representing renames/copies
207        that clobber already versioned files.  [(oldname newname)...]'''
208
209        #
210        # Note that this doesn't handle uncommitted merges
211        # as CdmUncommittedBackup itself doesn't.
212        #
213        wctx = self.ws.workingctx()
214        parent = wctx.parents()[0]
215
216        ret = []
217        for fname in wctx.added() + wctx.modified():
218            rn = wctx.filectx(fname).renamed()
219            if rn and fname in parent:
220                ret.append((rn[0], fname))
221        return ret
222
223    def backup(self):
224        '''Backup uncommitted changes'''
225
226        if self.ws.merged():
227            raise util.Abort("Unable to backup an uncommitted merge.\n"
228                             "Please complete your merge and commit")
229
230        dirstate = node.hex(self.ws.workingctx().parents()[0].node())
231
232        fp = None
233        try:
234            try:
235                fp = open(self.bu.backupfile('dirstate'), 'w')
236                fp.write(dirstate + '\n')
237            except EnvironmentError, e:
238                raise util.Abort("couldn't save working copy parent: %s" % e)
239        finally:
240            if fp and not fp.closed:
241                fp.close()
242
243        try:
244            try:
245                fp = open(self.bu.backupfile('renames'), 'w')
246                for cons in self._clobbering_renames():
247                    fp.write("%s %s\n" % cons)
248            except EnvironmentError, e:
249                raise util.Abort("couldn't save clobbering copies: %s" % e)
250        finally:
251            if fp and not fp.closed:
252                fp.close()
253
254        try:
255            try:
256                fp = open(self.bu.backupfile('diff'), 'w')
257                opts = patch.diffopts(self.ws.ui, opts={'git': True})
258                fp.write(self.ws.diff(opts=opts))
259            except EnvironmentError, e:
260                raise util.Abort("couldn't save working copy diff: %s" % e)
261        finally:
262            if fp and not fp.closed:
263                fp.close()
264
265    def _dirstate(self):
266        '''Return the desired working copy node from the backup'''
267        fp = None
268        try:
269            try:
270                fp = open(self.bu.backupfile('dirstate'))
271                dirstate = fp.readline().strip()
272                return dirstate
273            except EnvironmentError, e:
274                raise util.Abort("couldn't read saved parent: %s" % e)
275        finally:
276            if fp and not fp.closed:
277                fp.close()
278
279    def restore(self):
280        '''Restore uncommitted changes'''
281        diff = self.bu.backupfile('diff')
282        dirstate = self._dirstate()
283
284        #
285        # Check that the patch's parent changeset exists.
286        #
287        try:
288            n = node.bin(dirstate)
289            self.ws.repo.changelog.lookup(n)
290        except revlog.LookupError, e:
291            raise CdmNodeMissing("couldn't restore uncommitted changes",
292                                 e.name)
293
294        try:
295            self.ws.clean(rev=dirstate)
296        except util.Abort, e:
297            raise util.Abort("couldn't update to saved node: %s" % e)
298
299        if not os.path.exists(diff):
300            return
301
302        #
303        # There's a race here whereby if the patch (or part thereof)
304        # is applied within the same second as the clean above (such
305        # that mtime doesn't change) and if the size of that file
306        # does not change, Hg may not see the change.
307        #
308        # We sleep a full second to avoid this, as sleeping merely
309        # until the next second begins would require very close clock
310        # synchronization on network filesystems.
311        #
312        time.sleep(1)
313
314        files = {}
315        try:
316            try:
317                fuzz = patch.patch(diff, self.ws.ui, strip=1,
318                                   cwd=self.ws.repo.root, files=files)
319                if fuzz:
320                    raise util.Abort('working copy diff applied with fuzz')
321            except Exception, e:
322                raise util.Abort("couldn't apply working copy diff: %s\n"
323                                 "   %s" % (diff, e))
324        finally:
325            patch.updatedir(self.ws.ui, self.ws.repo, files)
326
327        if not os.path.exists(self.bu.backupfile('renames')):
328            return
329
330        #
331        # We need to re-apply name changes where the new name
332        # (rename/copy destination) is an already versioned file, as
333        # Hg would otherwise ignore them.
334        #
335        try:
336            fp = open(self.bu.backupfile('renames'))
337            for line in fp:
338                source, dest = line.strip().split()
339                self.ws.repo.copy(source, dest)
340        except EnvironmentError, e:
341            raise util.Abort('unable to open renames file: %s' % e)
342        except ValueError:
343            raise util.Abort('corrupt renames file: %s' %
344                             self.bu.backupfile('renames'))
345
346    def need_backup(self):
347        '''Compare backup of uncommitted changes to workspace'''
348        cnode = self.ws.workingctx().parents()[0].node()
349        if self._dirstate() != node.hex(cnode):
350            return True
351
352        opts = patch.diffopts(self.ws.ui, opts={'git': True})
353        curdiff = self.ws.diff(opts=opts)
354
355        diff = self.bu.backupfile('diff')
356        if os.path.exists(diff):
357            try:
358                try:
359                    fd = open(diff)
360                    backdiff = fd.read()
361                except EnvironmentError, e:
362                    raise util.Abort("couldn't open backup diff %s\n"
363                                     "   %s" % (diff, e))
364            finally:
365                if fd and not fd.closed:
366                    fd.close()
367        else:
368            backdiff = ''
369
370        if backdiff != curdiff:
371            return True
372
373
374        currrenamed = self._clobbering_renames()
375        bakrenamed = None
376
377        if os.path.exists(self.bu.backupfile('renames')):
378            try:
379                try:
380                    fd = open(self.bu.backupfile('renames'))
381                    bakrenamed = [line.strip().split(' ') for line in fd]
382                except EnvironmentError, e:
383                    raise util.Abort("couldn't open renames file %s: %s\n" %
384                                     (self.bu.backupfile('renames'), e))
385            finally:
386                if fd and not fd.closed:
387                    fd.close()
388
389            if currrenamed != bakrenamed:
390                return True
391
392        return False
393
394    def cleanup(self):
395        '''Remove backed up uncommitted changes'''
396        for fname in ('dirstate', 'diff', 'renames'):
397            if os.path.exists(self.bu.backupfile(fname)):
398                os.unlink(self.bu.backupfile(fname))
399
400
401class CdmMetadataBackup(object):
402    '''Backup of workspace metadata'''
403
404    def __init__(self, backup, ws):
405        self.bu = backup
406        self.ws = ws
407        self.files = ('hgrc', 'localtags', 'patches', 'cdm')
408
409    def backup(self):
410        '''Backup workspace metadata'''
411
412        tar = None
413
414        try:
415            try:
416                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'),
417                                   'w:gz')
418                tar.errorlevel = 2
419            except (EnvironmentError, tarfile.TarError), e:
420                raise util.Abort("couldn't open %s for writing: %s" %
421                                 (self.bu.backupfile('metadata.tar.gz'), e))
422
423            try:
424                for elt in self.files:
425                    fpath = self.ws.repo.join(elt)
426                    if os.path.exists(fpath):
427                        tar.add(fpath, elt)
428            except (EnvironmentError, tarfile.TarError), e:
429                #
430                # tarfile.TarError doesn't include the tar member or file
431                # in question, so we have to do so ourselves.
432                #
433                if isinstance(e, tarfile.TarError):
434                    error = "%s: %s" % (elt, e)
435                else:
436                    error = str(e)
437
438                raise util.Abort("couldn't backup metadata to %s:\n"
439                                 "  %s" %
440                                 (self.bu.backupfile('metadata.tar.gz'),
441                                  error))
442        finally:
443            if tar and not tar.closed:
444                tar.close()
445
446    def old_restore(self):
447        '''Restore workspace metadata from an pre-tar backup'''
448
449        for fname in self.files:
450            bfile = self.bu.backupfile(fname)
451            wfile = self.ws.repo.join(fname)
452
453            if os.path.exists(bfile):
454                try:
455                    shutil.copy2(bfile, wfile)
456                except EnvironmentError, e:
457                    raise util.Abort("couldn't restore metadata from %s:\n"
458                                     "   %s" % (bfile, e))
459
460    def tar_restore(self):
461        '''Restore workspace metadata (from a tar-style backup)'''
462
463        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
464            tar = None
465
466            try:
467                try:
468                    tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
469                    tar.errorlevel = 2
470                except (EnvironmentError, tarfile.TarError), e:
471                    raise util.Abort("couldn't open %s: %s" %
472                                 (self.bu.backupfile('metadata.tar.gz'), e))
473
474                try:
475                    for elt in tar:
476                        tar.extract(elt, path=self.ws.repo.path)
477                except (EnvironmentError, tarfile.TarError), e:
478                    # Make sure the member name is in the exception message.
479                    if isinstance(e, tarfile.TarError):
480                        error = "%s: %s" % (elt.name, e)
481                    else:
482                        error = str(e)
483
484                    raise util.Abort("couldn't restore metadata from %s:\n"
485                                     "   %s" %
486                                     (self.bu.backupfile('metadata.tar.gz'),
487                                      error))
488            finally:
489                if tar and not tar.closed:
490                    tar.close()
491
492    def restore(self):
493        '''Restore workspace metadata'''
494
495        if os.path.exists(self.bu.backupfile('hgrc')):
496            self.old_restore()
497        else:
498            self.tar_restore()
499
500    def need_backup(self):
501        '''Compare backed up workspace metadata to workspace'''
502
503        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
504            try:
505                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
506                tar.errorlevel = 2
507            except (EnvironmentError, tarfile.TarError), e:
508                raise util.Abort("couldn't open metadata tarball: %s\n"
509                                 "   %s" %
510                                 (self.bu.backupfile('metadata.tar.gz'), e))
511
512            for elt in tar:
513                fpath = self.ws.repo.join(elt.name)
514                if not os.path.exists(fpath):
515                    return True     # File in tar, not workspace
516
517                if elt.isdir():     # Don't care about directories
518                    continue
519
520                #
521                # The filesystem can give us mtime with fractional seconds
522                # (as a float), whereas tar files only keep it to the second.
523                #
524                # Always compare to the integer (second-granularity) mtime.
525                #
526                if (elt.mtime != int(os.path.getmtime(fpath)) or
527                    elt.size != os.path.getsize(fpath)):
528                    return True
529
530            tarnames = tar.getnames()
531            tar.close()
532        else:
533            tarnames = []
534
535        for mfile in self.files:
536            fpath = self.ws.repo.join(mfile)
537
538            if os.path.isdir(fpath):
539                # Directories in tarfile always end with a '/'
540                if not mfile.endswith('/'):
541                    mfile += '/'
542
543                if mfile not in tarnames:
544                    return True
545
546                for root, dirs, files in os.walk(fpath, topdown=True):
547                    for elt in files:
548                        path = os.path.join(root, elt)
549
550                        rpath = self.ws.repo.path
551                        if not rpath.endswith('/'):
552                            rpath += '/'
553
554                        path = path.replace(rpath, '', 1)
555                        if path not in tarnames:
556                            return True # In workspace not tar
557            else:
558                if os.path.exists(fpath) and mfile not in tarnames:
559                    return True
560
561        return False
562
563    def cleanup(self):
564        '''Remove backed up workspace metadata'''
565        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
566            os.unlink(self.bu.backupfile('metadata.tar.gz'))
567
568
569class CdmBackup(object):
570    '''A backup of a given workspace'''
571
572    def __init__(self, ui, ws, name):
573        self.ws = ws
574        self.ui = ui
575        self.backupdir = self._find_backup_dir(name)
576
577        #
578        # The order of instances here controls the order the various operations
579        # are run.
580        #
581        # There's some inherent dependence, in that on restore we need
582        # to restore committed changes prior to uncommitted changes
583        # (as the parent revision of any uncommitted changes is quite
584        # likely to not exist until committed changes are restored).
585        # Metadata restore can happen at any point, but happens last
586        # as a matter of convention.
587        #
588        self.modules = [x(self, ws) for x in [CdmCommittedBackup,
589                                              CdmUncommittedBackup,
590                                              CdmMetadataBackup]]
591
592
593        if os.path.exists(os.path.join(self.backupdir, 'latest')):
594            generation = os.readlink(os.path.join(self.backupdir, 'latest'))
595            self.generation = int(os.path.split(generation)[1])
596        else:
597            self.generation = 0
598
599    def _find_backup_dir(self, name):
600        '''Find the path to an appropriate backup directory based on NAME'''
601        backupdir = None
602        backupbase = None
603
604        if os.path.isabs(name):
605            return name
606
607        if self.ui.config('cdm', 'backupdir'):
608            backupbase = os.path.expanduser(self.ui.config('cdm', 'backupdir'))
609        else:
610            home = None
611
612            try:
613                home = os.getenv('HOME') or pwd.getpwuid(os.getuid()).pw_dir
614            except KeyError:
615                pass                    # Handled anyway
616
617            if not home:
618                raise util.Abort('Could not determine your HOME directory to '
619                                 'find backup path')
620
621            backupbase = os.path.join(home, 'cdm.backup')
622
623        backupdir = os.path.join(backupbase, name)
624
625        # If backupdir exists, it must be a directory.
626        if (os.path.exists(backupdir) and not os.path.isdir(backupdir)):
627            raise util.Abort('%s exists but is not a directory' % backupdir)
628
629        return backupdir
630
631    def backupfile(self, path):
632        '''return full path to backup file FILE at GEN'''
633        return os.path.join(self.backupdir, str(self.generation), path)
634
635    def update_latest(self, gen):
636        '''Update latest symlink to point to the current generation'''
637        linkpath = os.path.join(self.backupdir, 'latest')
638
639        if os.path.lexists(linkpath):
640            os.unlink(linkpath)
641
642        os.symlink(str(gen), linkpath)
643
644    def create_gen(self, gen):
645        '''Create a new backup generation'''
646        try:
647            os.makedirs(os.path.join(self.backupdir, str(gen)))
648            self.update_latest(gen)
649        except EnvironmentError, e:
650            raise util.Abort("Couldn't create backup generation %s: %s" %
651                             (os.path.join(self.backupdir, str(gen)), e))
652
653    def need_backup(self):
654        '''Compare backed up changes to workspace'''
655        #
656        # If there's no current backup generation, or the last backup was
657        # invalid (lacking the dirstate file), we need a backup regardless
658        # of anything else.
659        #
660        if (not self.generation or
661            not os.path.exists(self.backupfile('dirstate'))):
662            return True
663
664        for x in self.modules:
665            if x.need_backup():
666                return True
667
668        return False
669
670    def backup(self):
671        '''Take a backup of the current workspace'''
672
673        if not os.path.exists(self.backupdir):
674            try:
675                os.makedirs(self.backupdir)
676            except EnvironmentError, e:
677                raise util.Abort('Could not create backup directory %s: %s' %
678                                 (self.backupdir, e))
679
680        self.generation += 1
681        self.create_gen(self.generation)
682
683        #
684        # Lock the repo, so the backup can be consistent.  We need the
685        # wlock too to make sure the dirstate parent doesn't change
686        # underneath us.
687        #
688
689        lock = self.ws.repo.lock()
690        wlock = self.ws.repo.lock()
691
692        try:
693            for x in self.modules:
694                x.backup()
695        except Exception, e:
696            if isinstance(e, KeyboardInterrupt):
697                self.ws.ui.warn("Interrupted\n")
698            else:
699                self.ws.ui.warn("Error: %s\n" % e)
700
701                #
702                # If it's not a 'normal' error, we want to print a stack
703                # trace now in case the attempt to remove the partial
704                # backup also fails, and raises a second exception.
705                #
706                if (not isinstance(e, (EnvironmentError, util.Abort))
707                    or self.ws.ui.traceback):
708                    traceback.print_exc()
709
710            for x in self.modules:
711                x.cleanup()
712
713            os.rmdir(os.path.join(self.backupdir, str(self.generation)))
714            self.generation -= 1
715
716            if self.generation != 0:
717                self.update_latest(self.generation)
718            else:
719                os.unlink(os.path.join(self.backupdir, 'latest'))
720
721            raise util.Abort('Backup failed')
722
723    def restore(self, gen=None):
724        '''Restore workspace from backup
725
726        Restores from backup generation GEN (defaulting to the latest)
727        into workspace WS.'''
728
729        wlock = self.ws.repo.wlock()
730        lock = self.ws.repo.lock()
731
732        if not os.path.exists(self.backupdir):
733            raise util.Abort('Backup directory does not exist: %s' %
734                             (self.backupdir))
735
736        if gen:
737            if not os.path.exists(os.path.join(self.backupdir, str(gen))):
738                raise util.Abort('Backup generation does not exist: %s' %
739                                 (os.path.join(self.backupdir, str(gen))))
740            self.generation = int(gen)
741
742        if not self.generation: # This is ok, 0 is not a valid generation
743            raise util.Abort('Backup has no generations: %s' % self.backupdir)
744
745        if not os.path.exists(self.backupfile('dirstate')):
746            raise util.Abort('Backup %s/%s is incomplete (dirstate missing)' %
747                             (self.backupdir, self.generation))
748
749        try:
750            for x in self.modules:
751                x.restore()
752        except util.Abort, e:
753            raise util.Abort('Error restoring workspace:\n'
754                             '%s\n'
755                             'Workspace may be partially restored' % e)
756