xref: /titanic_50/usr/src/tools/onbld/Scm/Backup.py (revision 2e02daeede04af58a9d4f18f8dfed1fda3ececa1)
1#
2#  This program is free software; you can redistribute it and/or modify
3#  it under the terms of the GNU General Public License version 2
4#  as published by the Free Software Foundation.
5#
6#  This program is distributed in the hope that it will be useful,
7#  but WITHOUT ANY WARRANTY; without even the implied warranty of
8#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9#  GNU General Public License for more details.
10#
11#  You should have received a copy of the GNU General Public License
12#  along with this program; if not, write to the Free Software
13#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14#
15
16#
17# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
18# Use is subject to license terms.
19#
20
21'''
22Workspace backup
23
24Backup format is:
25   backupdir/
26      wsname/
27         generation#/
28            dirstate (handled by CdmUncommittedBackup)
29                File containing dirstate nodeid (the changeset we need
30                to update the workspace to after applying the bundle).
31                This is the node to which the working copy changes
32                (see 'diff', below) will be applied if applicable.
33
34            bundle (handled by CdmCommittedBackup)
35                An Hg bundle containing outgoing committed changes.
36
37            nodes (handled by CdmCommittedBackup)
38                A text file listing the full (hex) nodeid of all nodes in
39                bundle, used by need_backup.
40
41            diff (handled by CdmUncommittedBackup)
42                A Git-formatted diff containing uncommitted changes.
43
44            renames (handled by CdmUncommittedBackup)
45                A list of renames in the working copy that have to be
46                applied manually, rather than by the diff.
47
48            metadata.tar.gz (handled by CdmMetadataBackup)
49                $CODEMGR_WS/.hg/hgrc
50                $CODEMGR_WS/.hg/localtags
51                $CODEMGR_WS/.hg/patches (Mq data)
52
53         latest -> generation#
54            Newest backup generation.
55
56All files in a given backup generation, with the exception of
57dirstate, are optional.
58'''
59
60import os, pwd, shutil, traceback, tarfile, time
61from mercurial import changegroup, patch, node, util, revlog
62from cStringIO import StringIO
63
64
65class CdmNodeMissing(util.Abort):
66    '''a required node is not present in the destination workspace.
67
68    This may occur both in the case where the bundle contains a
69    changeset which is a child of a node not present in the
70    destination workspace (because the destination workspace is not as
71    up-to-date as the source), or because the source and destination
72    workspace are not related.
73
74    It may also happen in cases where the uncommitted changes need to
75    be applied onto a node that the workspace does not possess even
76    after application of the bundle (on a branch not present
77    in the bundle or destination workspace, for instance)'''
78
79    def __init__(self, msg, name):
80        #
81        # If e.name is a string 20 characters long, it is
82        # assumed to be a node.  (Mercurial makes this
83        # same assumption, when creating a LookupError)
84        #
85        if isinstance(name, str) and len(name) == 20:
86            n = node.short(name)
87        else:
88            n = name
89
90        util.Abort.__init__(self, "%s: changeset '%s' is missing\n"
91                            "Your workspace is either not "
92                            "sufficiently up to date,\n"
93                            "or is unrelated to the workspace from "
94                            "which the backup was taken.\n" % (msg, n))
95
96
97class CdmCommittedBackup(object):
98    '''Backup of committed changes'''
99
100    def __init__(self, backup, ws):
101        self.ws = ws
102        self.bu = backup
103        self.files = ('bundle', 'nodes')
104
105    def _outgoing_nodes(self, parent):
106        '''Return a list of all outgoing nodes in hex format'''
107
108        if parent:
109            outgoing = self.ws.findoutgoing(parent)
110            nodes = self.ws.repo.changelog.nodesbetween(outgoing)[0]
111            return map(node.hex, nodes)
112        else:
113            return []
114
115    def backup(self):
116        '''Backup committed changes'''
117        parent = self.ws.parent()
118
119        if not parent:
120            self.ws.ui.warn('Workspace has no parent, committed changes will '
121                            'not be backed up\n')
122            return
123
124        out = self.ws.findoutgoing(parent)
125        if not out:
126            return
127
128        cg = self.ws.repo.changegroup(out, 'bundle')
129        changegroup.writebundle(cg, self.bu.backupfile('bundle'), 'HG10BZ')
130
131        outnodes = self._outgoing_nodes(parent)
132        if outnodes:
133            fp = None
134            try:
135                try:
136                    fp = open(self.bu.backupfile('nodes'), 'w')
137                    fp.write('%s\n' % '\n'.join(outnodes))
138                except EnvironmentError, e:
139                    raise util.Abort("couldn't store outgoing nodes: %s" % e)
140            finally:
141                if fp and not fp.closed:
142                    fp.close()
143
144    def restore(self):
145        '''Restore committed changes from backup'''
146        bfile = self.bu.backupfile('bundle')
147
148        if os.path.exists(bfile):
149            f = None
150            try:
151                try:
152                    f = open(bfile, 'r')
153                    bundle = changegroup.readbundle(f, bfile)
154                    self.ws.repo.addchangegroup(bundle, 'strip',
155                                                'bundle:%s' % bfile)
156                except EnvironmentError, e:
157                    raise util.Abort("couldn't restore committed changes: %s\n"
158                                     "   %s" % (bfile, e))
159                except revlog.LookupError, e:
160                    raise CdmNodeMissing("couldn't restore committed changes",
161                                                     e.name)
162            finally:
163                if f and not f.closed:
164                    f.close()
165
166    def need_backup(self):
167        '''Compare backup of committed changes to workspace'''
168
169        if os.path.exists(self.bu.backupfile('nodes')):
170            f = None
171            try:
172                try:
173                    f = open(self.bu.backupfile('nodes'))
174                    bnodes = set([line.rstrip('\r\n')
175                                  for line in f.readlines()])
176                    f.close()
177                except EnvironmentError, e:
178                    raise util.Abort("couldn't open backup node list: %s" % e)
179            finally:
180                if f and not f.closed:
181                    f.close()
182        else:
183            bnodes = set()
184
185        outnodes = set(self._outgoing_nodes(self.ws.parent()))
186        if outnodes != bnodes:
187            return True
188
189        return False
190
191    def cleanup(self):
192        '''Remove backed up committed changes'''
193
194        for fname in self.files:
195            if os.path.exists(self.bu.backupfile(fname)):
196                os.unlink(self.bu.backupfile(fname))
197
198
199class CdmUncommittedBackup(object):
200    '''Backup of uncommitted changes'''
201
202    def __init__(self, backup, ws):
203        self.ws = ws
204        self.bu = backup
205
206    def _clobbering_renames(self):
207        '''Return a list of pairs of files representing renames/copies
208        that clobber already versioned files.  [(oldname newname)...]'''
209
210        #
211        # Note that this doesn't handle uncommitted merges
212        # as CdmUncommittedBackup itself doesn't.
213        #
214        wctx = self.ws.repo.workingctx()
215        parent = wctx.parents()[0]
216
217        ret = []
218        for fname in wctx.added() + wctx.modified():
219            rn = wctx.filectx(fname).renamed()
220            if rn and fname in parent:
221                ret.append((rn[0], fname))
222        return ret
223
224    def backup(self):
225        '''Backup uncommitted changes'''
226
227        if self.ws.merged():
228            raise util.Abort("Unable to backup an uncommitted merge.\n"
229                             "Please complete your merge and commit")
230
231        dirstate = node.hex(self.ws.repo.changectx().node())
232
233        fp = None
234        try:
235            try:
236                fp = open(self.bu.backupfile('dirstate'), 'w')
237                fp.write(dirstate + '\n')
238            except EnvironmentError, e:
239                raise util.Abort("couldn't save working copy parent: %s" % e)
240        finally:
241            if fp and not fp.closed:
242                fp.close()
243
244        try:
245            try:
246                fp = open(self.bu.backupfile('renames'), 'w')
247                for cons in self._clobbering_renames():
248                    fp.write("%s %s\n" % cons)
249            except EnvironmentError, e:
250                raise util.Abort("couldn't save clobbering copies: %s" % e)
251        finally:
252            if fp and not fp.closed:
253                fp.close()
254
255        try:
256            try:
257                fp = open(self.bu.backupfile('diff'), 'w')
258                patch.diff(self.ws.repo, fp=fp,
259                           opts=patch.diffopts(self.ws.ui, opts={'git': True}))
260            except EnvironmentError, e:
261                raise util.Abort("couldn't save working copy diff: %s" % e)
262        finally:
263            if fp and not fp.closed:
264                fp.close()
265
266    def _dirstate(self):
267        '''Return the current working copy node'''
268        fp = None
269        try:
270            try:
271                fp = open(self.bu.backupfile('dirstate'))
272                dirstate = fp.readline().strip()
273                return dirstate
274            except EnvironmentError, e:
275                raise util.Abort("couldn't read saved parent: %s" % e)
276        finally:
277            if fp and not fp.closed:
278                fp.close()
279
280    def restore(self):
281        '''Restore uncommitted changes'''
282        diff = self.bu.backupfile('diff')
283        dirstate = self._dirstate()
284
285        try:
286            self.ws.clean(rev=dirstate)
287        except revlog.LookupError, e:
288            raise CdmNodeMissing("couldn't restore uncommitted changes",
289                                             e.name)
290        except util.Abort, e:
291            raise util.Abort("couldn't update to saved node: %s" % e)
292
293        if not os.path.exists(diff):
294            return
295
296        #
297        # There's a race here whereby if the patch (or part thereof)
298        # is applied within the same second as the clean above (such
299        # that mtime doesn't change) and if the size of that file
300        # does not change, Hg may not see the change.
301        #
302        # We sleep a full second to avoid this, as sleeping merely
303        # until the next second begins would require very close clock
304        # synchronization on network filesystems.
305        #
306        time.sleep(1)
307
308        files = {}
309        try:
310            try:
311                fuzz = patch.patch(diff, self.ws.ui, strip=1,
312                                   cwd=self.ws.repo.root, files=files)
313                if fuzz:
314                    raise util.Abort('working copy diff applied with fuzz')
315            except Exception, e:
316                raise util.Abort("couldn't apply working copy diff: %s\n"
317                                 "   %s" % (diff, e))
318        finally:
319            patch.updatedir(self.ws.ui, self.ws.repo, files)
320
321        if not os.path.exists(self.bu.backupfile('renames')):
322            return
323
324        #
325        # We need to re-apply name changes where the new name
326        # (rename/copy destination) is an already versioned file, as
327        # Hg would otherwise ignore them.
328        #
329        try:
330            fp = open(self.bu.backupfile('renames'))
331            for line in fp:
332                source, dest = line.strip().split()
333                self.ws.repo.copy(source, dest)
334        except EnvironmentError, e:
335            raise util.Abort('unable to open renames file: %s' % e)
336        except ValueError:
337            raise util.Abort('corrupt renames file: %s' %
338                             self.bu.backupfile('renames'))
339
340    def need_backup(self):
341        '''Compare backup of uncommitted changes to workspace'''
342        if self._dirstate() != node.hex(self.ws.repo.changectx().node()):
343            return True
344
345        curdiff = StringIO()
346        diff = self.bu.backupfile('diff')
347        fd = None
348
349        patch.diff(self.ws.repo, fp=curdiff,
350                   opts=patch.diffopts(self.ws.ui, opts={'git': True}))
351
352        if os.path.exists(diff):
353            try:
354                try:
355                    fd = open(diff)
356                    backdiff = fd.read()
357                except EnvironmentError, e:
358                    raise util.Abort("couldn't open backup diff %s\n"
359                                     "   %s" % (diff, e))
360            finally:
361                if fd and not fd.closed:
362                    fd.close()
363        else:
364            backdiff = ''
365
366        if backdiff != curdiff.getvalue():
367            return True
368
369
370        currrenamed = self._clobbering_renames()
371        bakrenamed = None
372
373        if os.path.exists(self.bu.backupfile('renames')):
374            try:
375                try:
376                    fd = open(self.bu.backupfile('renames'))
377                    bakrenamed = [line.strip().split(' ') for line in fd]
378                except EnvironmentError, e:
379                    raise util.Abort("couldn't open renames file %s: %s\n" %
380                                     (self.bu.backupfile('renames'), e))
381            finally:
382                if fd and not fd.closed:
383                    fd.close()
384
385            if currrenamed != bakrenamed:
386                return True
387
388        return False
389
390    def cleanup(self):
391        '''Remove backed up uncommitted changes'''
392        for fname in ('dirstate', 'diff', 'renames'):
393            if os.path.exists(self.bu.backupfile(fname)):
394                os.unlink(self.bu.backupfile(fname))
395
396
397class CdmMetadataBackup(object):
398    '''Backup of workspace metadata'''
399
400    def __init__(self, backup, ws):
401        self.bu = backup
402        self.ws = ws
403        self.files = ('hgrc', 'localtags', 'patches', 'cdm')
404
405    def backup(self):
406        '''Backup workspace metadata'''
407
408        tar = None
409
410        try:
411            try:
412                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'),
413                                   'w:gz')
414                tar.errorlevel = 2
415            except (EnvironmentError, tarfile.TarError), e:
416                raise util.Abort("couldn't open %s for writing: %s" %
417                                 (self.bu.backupfile('metadata.tar.gz'), e))
418
419            try:
420                for elt in self.files:
421                    fpath = self.ws.repo.join(elt)
422                    if os.path.exists(fpath):
423                        tar.add(fpath, elt)
424            except (EnvironmentError, tarfile.TarError), e:
425                #
426                # tarfile.TarError doesn't include the tar member or file
427                # in question, so we have to do so ourselves.
428                #
429                if isinstance(e, tarfile.TarError):
430                    error = "%s: %s" % (elt, e)
431                else:
432                    error = str(e)
433
434                raise util.Abort("couldn't backup metadata to %s:\n"
435                                 "  %s" %
436                                 (self.bu.backupfile('metadata.tar.gz'),
437                                  error))
438        finally:
439            if tar and not tar.closed:
440                tar.close()
441
442    def old_restore(self):
443        '''Restore workspace metadata from an pre-tar backup'''
444
445        for fname in self.files:
446            bfile = self.bu.backupfile(fname)
447            wfile = self.ws.repo.join(fname)
448
449            if os.path.exists(bfile):
450                try:
451                    shutil.copy2(bfile, wfile)
452                except EnvironmentError, e:
453                    raise util.Abort("couldn't restore metadata from %s:\n"
454                                     "   %s" % (bfile, e))
455
456    def tar_restore(self):
457        '''Restore workspace metadata (from a tar-style backup)'''
458
459        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
460            tar = None
461
462            try:
463                try:
464                    tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
465                    tar.errorlevel = 2
466                except (EnvironmentError, tarfile.TarError), e:
467                    raise util.Abort("couldn't open %s: %s" %
468                                 (self.bu.backupfile('metadata.tar.gz'), e))
469
470                try:
471                    for elt in tar:
472                        tar.extract(elt, path=self.ws.repo.path)
473                except (EnvironmentError, tarfile.TarError), e:
474                    # Make sure the member name is in the exception message.
475                    if isinstance(e, tarfile.TarError):
476                        error = "%s: %s" % (elt.name, e)
477                    else:
478                        error = str(e)
479
480                    raise util.Abort("couldn't restore metadata from %s:\n"
481                                     "   %s" %
482                                     (self.bu.backupfile('metadata.tar.gz'),
483                                      error))
484            finally:
485                if tar and not tar.closed:
486                    tar.close()
487
488    def restore(self):
489        '''Restore workspace metadata'''
490
491        if os.path.exists(self.bu.backupfile('hgrc')):
492            self.old_restore()
493        else:
494            self.tar_restore()
495
496    def need_backup(self):
497        '''Compare backed up workspace metadata to workspace'''
498
499        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
500            try:
501                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
502                tar.errorlevel = 2
503            except (EnvironmentError, tarfile.TarError), e:
504                raise util.Abort("couldn't open metadata tarball: %s\n"
505                                 "   %s" %
506                                 (self.bu.backupfile('metadata.tar.gz'), e))
507
508            for elt in tar:
509                fpath = self.ws.repo.join(elt.name)
510                if not os.path.exists(fpath):
511                    return True     # File in tar, not workspace
512
513                if elt.isdir():     # Don't care about directories
514                    continue
515
516                if (elt.mtime != os.path.getmtime(fpath) or
517                    elt.size != os.path.getsize(fpath)):
518                    return True
519
520            tarnames = tar.getnames()
521            tar.close()
522        else:
523            tarnames = []
524
525        for mfile in self.files:
526            fpath = self.ws.repo.join(mfile)
527
528            if os.path.isdir(fpath):
529                # Directories in tarfile always end with a '/'
530                if not mfile.endswith('/'):
531                    mfile += '/'
532
533                if mfile not in tarnames:
534                    return True
535
536                for root, dirs, files in os.walk(fpath, topdown=True):
537                    for elt in files:
538                        path = os.path.join(root, elt)
539
540                        rpath = self.ws.repo.path
541                        if not rpath.endswith('/'):
542                            rpath += '/'
543
544                        path = path.replace(rpath, '', 1)
545                        if path not in tarnames:
546                            return True # In workspace not tar
547            else:
548                if os.path.exists(fpath) and mfile not in tarnames:
549                    return True
550
551        return False
552
553    def cleanup(self):
554        '''Remove backed up workspace metadata'''
555        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
556            os.unlink(self.bu.backupfile('metadata.tar.gz'))
557
558
559class CdmBackup(object):
560    '''A backup of a given workspace'''
561
562    def __init__(self, ui, ws, name):
563        self.ws = ws
564        self.ui = ui
565        self.backupdir = self._find_backup_dir(name)
566
567        #
568        # The order of instances here controls the order the various operations
569        # are run.
570        #
571        # There's some inherent dependence, in that on restore we need
572        # to restore committed changes prior to uncommitted changes
573        # (as the parent revision of any uncommitted changes is quite
574        # likely to not exist until committed changes are restored).
575        # Metadata restore can happen at any point, but happens last
576        # as a matter of convention.
577        #
578        self.modules = [x(self, ws) for x in [CdmCommittedBackup,
579                                              CdmUncommittedBackup,
580                                              CdmMetadataBackup]]
581
582
583        if os.path.exists(os.path.join(self.backupdir, 'latest')):
584            generation = os.readlink(os.path.join(self.backupdir, 'latest'))
585            self.generation = int(os.path.split(generation)[1])
586        else:
587            self.generation = 0
588
589    def _find_backup_dir(self, name):
590        '''Find the path to an appropriate backup directory based on NAME'''
591        backupdir = None
592        backupbase = None
593
594        if os.path.isabs(name):
595            return name
596
597        if self.ui.config('cdm', 'backupdir'):
598            backupbase = os.path.expanduser(self.ui.config('cdm', 'backupdir'))
599        else:
600            home = None
601
602            try:
603                home = os.getenv('HOME') or pwd.getpwuid(os.getuid()).pw_dir
604            except KeyError:
605                pass                    # Handled anyway
606
607            if not home:
608                raise util.Abort('Could not determine your HOME directory to '
609                                 'find backup path')
610
611            backupbase = os.path.join(home, 'cdm.backup')
612
613        backupdir = os.path.join(backupbase, name)
614
615        # If backupdir exists, it must be a directory.
616        if (os.path.exists(backupdir) and not os.path.isdir(backupdir)):
617            raise util.Abort('%s exists but is not a directory' % backupdir)
618
619        return backupdir
620
621    def backupfile(self, path):
622        '''return full path to backup file FILE at GEN'''
623        return os.path.join(self.backupdir, str(self.generation), path)
624
625    def update_latest(self, gen):
626        '''Update latest symlink to point to the current generation'''
627        linkpath = os.path.join(self.backupdir, 'latest')
628
629        if os.path.lexists(linkpath):
630            os.unlink(linkpath)
631
632        os.symlink(str(gen), linkpath)
633
634    def create_gen(self, gen):
635        '''Create a new backup generation'''
636        try:
637            os.makedirs(os.path.join(self.backupdir, str(gen)))
638            self.update_latest(gen)
639        except EnvironmentError, e:
640            raise util.Abort("Couldn't create backup generation %s: %s" %
641                             (os.path.join(self.backupdir, str(gen)), e))
642
643    def need_backup(self):
644        '''Compare backed up changes to workspace'''
645        #
646        # If there's no current backup generation, or the last backup was
647        # invalid (lacking the dirstate file), we need a backup regardless
648        # of anything else.
649        #
650        if (not self.generation or
651            not os.path.exists(self.backupfile('dirstate'))):
652            return True
653
654        for x in self.modules:
655            if x.need_backup():
656                return True
657
658        return False
659
660    def backup(self):
661        '''Take a backup of the current workspace'''
662
663        if not os.path.exists(self.backupdir):
664            try:
665                os.makedirs(self.backupdir)
666            except EnvironmentError, e:
667                raise util.Abort('Could not create backup directory %s: %s' %
668                                 (self.backupdir, e))
669
670        self.generation += 1
671        self.create_gen(self.generation)
672
673        #
674        # Lock the repo, so the backup can be consistent.  We need the
675        # wlock too to make sure the dirstate parent doesn't change
676        # underneath us.
677        #
678
679        lock = self.ws.repo.lock()
680        wlock = self.ws.repo.lock()
681
682        try:
683            for x in self.modules:
684                x.backup()
685        except Exception, e:
686            if isinstance(e, KeyboardInterrupt):
687                self.ws.ui.warn("Interrupted\n")
688            else:
689                self.ws.ui.warn("Error: %s\n" % e)
690
691                #
692                # If it's not a 'normal' error, we want to print a stack
693                # trace now in case the attempt to remove the partial
694                # backup also fails, and raises a second exception.
695                #
696                if (not isinstance(e, (EnvironmentError, util.Abort))
697                    or self.ws.ui.traceback):
698                    traceback.print_exc()
699
700            for x in self.modules:
701                x.cleanup()
702
703            os.rmdir(os.path.join(self.backupdir, str(self.generation)))
704            self.generation -= 1
705
706            if self.generation != 0:
707                self.update_latest(self.generation)
708            else:
709                os.unlink(os.path.join(self.backupdir, 'latest'))
710
711            raise util.Abort('Backup failed')
712
713    def restore(self, gen=None):
714        '''Restore workspace from backup
715
716        Restores from backup generation GEN (defaulting to the latest)
717        into workspace WS.'''
718
719        wlock = self.ws.repo.wlock()
720        lock = self.ws.repo.lock()
721
722        if not os.path.exists(self.backupdir):
723            raise util.Abort('Backup directory does not exist: %s' %
724                             (self.backupdir))
725
726        if gen:
727            if not os.path.exists(os.path.join(self.backupdir, str(gen))):
728                raise util.Abort('Backup generation does not exist: %s' %
729                                 (os.path.join(self.backupdir, str(gen))))
730            self.generation = int(gen)
731
732        if not self.generation: # This is ok, 0 is not a valid generation
733            raise util.Abort('Backup has no generations: %s' % self.backupdir)
734
735        if not os.path.exists(self.backupfile('dirstate')):
736            raise util.Abort('Backup %s/%s is incomplete (dirstate missing)' %
737                             (self.backupdir, self.generation))
738
739        try:
740            for x in self.modules:
741                x.restore()
742        except util.Abort, e:
743            raise util.Abort('Error restoring workspace:\n'
744                             '%s\n'
745                             'Workspace may be partially restored' % e)
746