xref: /titanic_50/usr/src/tools/onbld/Scm/Backup.py (revision 3f7d54a6b84904c8f4d8daa4c7b577bede7df8b9)
1#
2#  This program is free software; you can redistribute it and/or modify
3#  it under the terms of the GNU General Public License version 2
4#  as published by the Free Software Foundation.
5#
6#  This program is distributed in the hope that it will be useful,
7#  but WITHOUT ANY WARRANTY; without even the implied warranty of
8#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9#  GNU General Public License for more details.
10#
11#  You should have received a copy of the GNU General Public License
12#  along with this program; if not, write to the Free Software
13#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14#
15
16#
17# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
18# Use is subject to license terms.
19#
20
21'''
22Workspace backup
23
24Backup format is:
25   backupdir/
26      wsname/
27         generation#/
28            dirstate (handled by CdmUncommittedBackup)
29                File containing dirstate nodeid (the changeset we need
30                to update the workspace to after applying the bundle).
31                This is the node to which the working copy changes
32                (see 'diff', below) will be applied if applicable.
33
34            bundle (handled by CdmCommittedBackup)
35                An Hg bundle containing outgoing committed changes.
36
37            nodes (handled by CdmCommittedBackup)
38                A text file listing the full (hex) nodeid of all nodes in
39                bundle, used by need_backup.
40
41            diff (handled by CdmUncommittedBackup)
42                A Git-formatted diff containing uncommitted changes.
43
44            renames (handled by CdmUncommittedBackup)
45                A list of renames in the working copy that have to be
46                applied manually, rather than by the diff.
47
48            metadata.tar.gz (handled by CdmMetadataBackup)
49                $CODEMGR_WS/.hg/hgrc
50                $CODEMGR_WS/.hg/localtags
51                $CODEMGR_WS/.hg/patches (Mq data)
52
53         latest -> generation#
54            Newest backup generation.
55
56All files in a given backup generation, with the exception of
57dirstate, are optional.
58'''
59
60import os, pwd, shutil, tarfile, time, traceback
61from mercurial import changegroup, node, patch, util
62
63from onbld.Scm.WorkSpace import HgLookupError
64import onbld.Scm.Version as Version
65
66
67class CdmNodeMissing(util.Abort):
68    '''a required node is not present in the destination workspace.
69
70    This may occur both in the case where the bundle contains a
71    changeset which is a child of a node not present in the
72    destination workspace (because the destination workspace is not as
73    up-to-date as the source), or because the source and destination
74    workspace are not related.
75
76    It may also happen in cases where the uncommitted changes need to
77    be applied onto a node that the workspace does not possess even
78    after application of the bundle (on a branch not present
79    in the bundle or destination workspace, for instance)'''
80
81    def __init__(self, msg, name):
82        #
83        # If e.name is a string 20 characters long, it is
84        # assumed to be a node.  (Mercurial makes this
85        # same assumption, when creating a LookupError)
86        #
87        if isinstance(name, str) and len(name) == 20:
88            n = node.short(name)
89        else:
90            n = name
91
92        util.Abort.__init__(self, "%s: changeset '%s' is missing\n"
93                            "Your workspace is either not "
94                            "sufficiently up to date,\n"
95                            "or is unrelated to the workspace from "
96                            "which the backup was taken.\n" % (msg, n))
97
98
99class CdmCommittedBackup(object):
100    '''Backup of committed changes'''
101
102    def __init__(self, backup, ws):
103        self.ws = ws
104        self.bu = backup
105        self.files = ('bundle', 'nodes')
106
107    def _outgoing_nodes(self, parent):
108        '''Return a list of all outgoing nodes in hex format'''
109
110        if parent:
111            outgoing = self.ws.findoutgoing(parent)
112            nodes = self.ws.repo.changelog.nodesbetween(outgoing)[0]
113            return map(node.hex, nodes)
114        else:
115            return []
116
117    def backup(self):
118        '''Backup committed changes'''
119        parent = self.ws.parent()
120
121        if not parent:
122            self.ws.ui.warn('Workspace has no parent, committed changes will '
123                            'not be backed up\n')
124            return
125
126        out = self.ws.findoutgoing(parent)
127        if not out:
128            return
129
130        cg = self.ws.repo.changegroup(out, 'bundle')
131        changegroup.writebundle(cg, self.bu.backupfile('bundle'), 'HG10BZ')
132
133        outnodes = self._outgoing_nodes(parent)
134        if outnodes:
135            fp = None
136            try:
137                try:
138                    fp = open(self.bu.backupfile('nodes'), 'w')
139                    fp.write('%s\n' % '\n'.join(outnodes))
140                except EnvironmentError, e:
141                    raise util.Abort("couldn't store outgoing nodes: %s" % e)
142            finally:
143                if fp and not fp.closed:
144                    fp.close()
145
146    def restore(self):
147        '''Restore committed changes from backup'''
148        bfile = self.bu.backupfile('bundle')
149
150        if os.path.exists(bfile):
151            f = None
152            try:
153                try:
154                    f = open(bfile, 'r')
155                    bundle = changegroup.readbundle(f, bfile)
156                    self.ws.repo.addchangegroup(bundle, 'strip',
157                                                'bundle:%s' % bfile)
158                except EnvironmentError, e:
159                    raise util.Abort("couldn't restore committed changes: %s\n"
160                                     "   %s" % (bfile, e))
161                except HgLookupError, e:
162                    raise CdmNodeMissing("couldn't restore committed changes",
163                                                     e.name)
164            finally:
165                if f and not f.closed:
166                    f.close()
167
168    def need_backup(self):
169        '''Compare backup of committed changes to workspace'''
170
171        if os.path.exists(self.bu.backupfile('nodes')):
172            f = None
173            try:
174                try:
175                    f = open(self.bu.backupfile('nodes'))
176                    bnodes = set([line.rstrip('\r\n')
177                                  for line in f.readlines()])
178                    f.close()
179                except EnvironmentError, e:
180                    raise util.Abort("couldn't open backup node list: %s" % e)
181            finally:
182                if f and not f.closed:
183                    f.close()
184        else:
185            bnodes = set()
186
187        outnodes = set(self._outgoing_nodes(self.ws.parent()))
188        if outnodes != bnodes:
189            return True
190
191        return False
192
193    def cleanup(self):
194        '''Remove backed up committed changes'''
195
196        for fname in self.files:
197            if os.path.exists(self.bu.backupfile(fname)):
198                os.unlink(self.bu.backupfile(fname))
199
200
201class CdmUncommittedBackup(object):
202    '''Backup of uncommitted changes'''
203
204    def __init__(self, backup, ws):
205        self.ws = ws
206        self.bu = backup
207
208    def _clobbering_renames(self):
209        '''Return a list of pairs of files representing renames/copies
210        that clobber already versioned files.  [(oldname newname)...]'''
211
212        #
213        # Note that this doesn't handle uncommitted merges
214        # as CdmUncommittedBackup itself doesn't.
215        #
216        wctx = self.ws.workingctx()
217        parent = wctx.parents()[0]
218
219        ret = []
220        for fname in wctx.added() + wctx.modified():
221            rn = wctx.filectx(fname).renamed()
222            if rn and fname in parent:
223                ret.append((rn[0], fname))
224        return ret
225
226    def backup(self):
227        '''Backup uncommitted changes'''
228
229        if self.ws.merged():
230            raise util.Abort("Unable to backup an uncommitted merge.\n"
231                             "Please complete your merge and commit")
232
233        dirstate = node.hex(self.ws.workingctx().parents()[0].node())
234
235        fp = None
236        try:
237            try:
238                fp = open(self.bu.backupfile('dirstate'), 'w')
239                fp.write(dirstate + '\n')
240            except EnvironmentError, e:
241                raise util.Abort("couldn't save working copy parent: %s" % e)
242        finally:
243            if fp and not fp.closed:
244                fp.close()
245
246        try:
247            try:
248                fp = open(self.bu.backupfile('renames'), 'w')
249                for cons in self._clobbering_renames():
250                    fp.write("%s %s\n" % cons)
251            except EnvironmentError, e:
252                raise util.Abort("couldn't save clobbering copies: %s" % e)
253        finally:
254            if fp and not fp.closed:
255                fp.close()
256
257        try:
258            try:
259                fp = open(self.bu.backupfile('diff'), 'w')
260                opts = patch.diffopts(self.ws.ui, opts={'git': True})
261                fp.write(self.ws.diff(opts=opts))
262            except EnvironmentError, e:
263                raise util.Abort("couldn't save working copy diff: %s" % e)
264        finally:
265            if fp and not fp.closed:
266                fp.close()
267
268    def _dirstate(self):
269        '''Return the desired working copy node from the backup'''
270        fp = None
271        try:
272            try:
273                fp = open(self.bu.backupfile('dirstate'))
274                dirstate = fp.readline().strip()
275                return dirstate
276            except EnvironmentError, e:
277                raise util.Abort("couldn't read saved parent: %s" % e)
278        finally:
279            if fp and not fp.closed:
280                fp.close()
281
282    def restore(self):
283        '''Restore uncommitted changes'''
284        diff = self.bu.backupfile('diff')
285        dirstate = self._dirstate()
286
287        #
288        # Check that the patch's parent changeset exists.
289        #
290        try:
291            n = node.bin(dirstate)
292            self.ws.repo.changelog.lookup(n)
293        except HgLookupError, e:
294            raise CdmNodeMissing("couldn't restore uncommitted changes",
295                                 e.name)
296
297        try:
298            self.ws.clean(rev=dirstate)
299        except util.Abort, e:
300            raise util.Abort("couldn't update to saved node: %s" % e)
301
302        if not os.path.exists(diff):
303            return
304
305        #
306        # There's a race here whereby if the patch (or part thereof)
307        # is applied within the same second as the clean above (such
308        # that mtime doesn't change) and if the size of that file
309        # does not change, Hg may not see the change.
310        #
311        # We sleep a full second to avoid this, as sleeping merely
312        # until the next second begins would require very close clock
313        # synchronization on network filesystems.
314        #
315        time.sleep(1)
316
317        files = {}
318        try:
319            try:
320                fuzz = patch.patch(diff, self.ws.ui, strip=1,
321                                   cwd=self.ws.repo.root, files=files)
322                if fuzz:
323                    raise util.Abort('working copy diff applied with fuzz')
324            except Exception, e:
325                raise util.Abort("couldn't apply working copy diff: %s\n"
326                                 "   %s" % (diff, e))
327        finally:
328            patch.updatedir(self.ws.ui, self.ws.repo, files)
329
330        if not os.path.exists(self.bu.backupfile('renames')):
331            return
332
333        #
334        # We need to re-apply name changes where the new name
335        # (rename/copy destination) is an already versioned file, as
336        # Hg would otherwise ignore them.
337        #
338        try:
339            fp = open(self.bu.backupfile('renames'))
340            for line in fp:
341                source, dest = line.strip().split()
342                self.ws.repo.copy(source, dest)
343        except EnvironmentError, e:
344            raise util.Abort('unable to open renames file: %s' % e)
345        except ValueError:
346            raise util.Abort('corrupt renames file: %s' %
347                             self.bu.backupfile('renames'))
348
349    def need_backup(self):
350        '''Compare backup of uncommitted changes to workspace'''
351        cnode = self.ws.workingctx().parents()[0].node()
352        if self._dirstate() != node.hex(cnode):
353            return True
354
355        opts = patch.diffopts(self.ws.ui, opts={'git': True})
356        curdiff = self.ws.diff(opts=opts)
357
358        diff = self.bu.backupfile('diff')
359        if os.path.exists(diff):
360            try:
361                try:
362                    fd = open(diff)
363                    backdiff = fd.read()
364                except EnvironmentError, e:
365                    raise util.Abort("couldn't open backup diff %s\n"
366                                     "   %s" % (diff, e))
367            finally:
368                if fd and not fd.closed:
369                    fd.close()
370        else:
371            backdiff = ''
372
373        if backdiff != curdiff:
374            return True
375
376
377        currrenamed = self._clobbering_renames()
378        bakrenamed = None
379
380        if os.path.exists(self.bu.backupfile('renames')):
381            try:
382                try:
383                    fd = open(self.bu.backupfile('renames'))
384                    bakrenamed = [line.strip().split(' ') for line in fd]
385                except EnvironmentError, e:
386                    raise util.Abort("couldn't open renames file %s: %s\n" %
387                                     (self.bu.backupfile('renames'), e))
388            finally:
389                if fd and not fd.closed:
390                    fd.close()
391
392            if currrenamed != bakrenamed:
393                return True
394
395        return False
396
397    def cleanup(self):
398        '''Remove backed up uncommitted changes'''
399        for fname in ('dirstate', 'diff', 'renames'):
400            if os.path.exists(self.bu.backupfile(fname)):
401                os.unlink(self.bu.backupfile(fname))
402
403
404class CdmMetadataBackup(object):
405    '''Backup of workspace metadata'''
406
407    def __init__(self, backup, ws):
408        self.bu = backup
409        self.ws = ws
410        self.files = ('hgrc', 'localtags', 'patches', 'cdm')
411
412    def backup(self):
413        '''Backup workspace metadata'''
414
415        tar = None
416
417        try:
418            try:
419                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'),
420                                   'w:gz')
421                tar.errorlevel = 2
422            except (EnvironmentError, tarfile.TarError), e:
423                raise util.Abort("couldn't open %s for writing: %s" %
424                                 (self.bu.backupfile('metadata.tar.gz'), e))
425
426            try:
427                for elt in self.files:
428                    fpath = self.ws.repo.join(elt)
429                    if os.path.exists(fpath):
430                        tar.add(fpath, elt)
431            except (EnvironmentError, tarfile.TarError), e:
432                #
433                # tarfile.TarError doesn't include the tar member or file
434                # in question, so we have to do so ourselves.
435                #
436                if isinstance(e, tarfile.TarError):
437                    error = "%s: %s" % (elt, e)
438                else:
439                    error = str(e)
440
441                raise util.Abort("couldn't backup metadata to %s:\n"
442                                 "  %s" %
443                                 (self.bu.backupfile('metadata.tar.gz'),
444                                  error))
445        finally:
446            if tar and not tar.closed:
447                tar.close()
448
449    def old_restore(self):
450        '''Restore workspace metadata from an pre-tar backup'''
451
452        for fname in self.files:
453            bfile = self.bu.backupfile(fname)
454            wfile = self.ws.repo.join(fname)
455
456            if os.path.exists(bfile):
457                try:
458                    shutil.copy2(bfile, wfile)
459                except EnvironmentError, e:
460                    raise util.Abort("couldn't restore metadata from %s:\n"
461                                     "   %s" % (bfile, e))
462
463    def tar_restore(self):
464        '''Restore workspace metadata (from a tar-style backup)'''
465
466        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
467            tar = None
468
469            try:
470                try:
471                    tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
472                    tar.errorlevel = 2
473                except (EnvironmentError, tarfile.TarError), e:
474                    raise util.Abort("couldn't open %s: %s" %
475                                 (self.bu.backupfile('metadata.tar.gz'), e))
476
477                try:
478                    for elt in tar:
479                        tar.extract(elt, path=self.ws.repo.path)
480                except (EnvironmentError, tarfile.TarError), e:
481                    # Make sure the member name is in the exception message.
482                    if isinstance(e, tarfile.TarError):
483                        error = "%s: %s" % (elt.name, e)
484                    else:
485                        error = str(e)
486
487                    raise util.Abort("couldn't restore metadata from %s:\n"
488                                     "   %s" %
489                                     (self.bu.backupfile('metadata.tar.gz'),
490                                      error))
491            finally:
492                if tar and not tar.closed:
493                    tar.close()
494
495    def restore(self):
496        '''Restore workspace metadata'''
497
498        if os.path.exists(self.bu.backupfile('hgrc')):
499            self.old_restore()
500        else:
501            self.tar_restore()
502
503    def need_backup(self):
504        '''Compare backed up workspace metadata to workspace'''
505
506        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
507            try:
508                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
509                tar.errorlevel = 2
510            except (EnvironmentError, tarfile.TarError), e:
511                raise util.Abort("couldn't open metadata tarball: %s\n"
512                                 "   %s" %
513                                 (self.bu.backupfile('metadata.tar.gz'), e))
514
515            for elt in tar:
516                fpath = self.ws.repo.join(elt.name)
517                if not os.path.exists(fpath):
518                    return True     # File in tar, not workspace
519
520                if elt.isdir():     # Don't care about directories
521                    continue
522
523                #
524                # The filesystem can give us mtime with fractional seconds
525                # (as a float), whereas tar files only keep it to the second.
526                #
527                # Always compare to the integer (second-granularity) mtime.
528                #
529                if (elt.mtime != int(os.path.getmtime(fpath)) or
530                    elt.size != os.path.getsize(fpath)):
531                    return True
532
533            tarnames = tar.getnames()
534            tar.close()
535        else:
536            tarnames = []
537
538        for mfile in self.files:
539            fpath = self.ws.repo.join(mfile)
540
541            if os.path.isdir(fpath):
542                # Directories in tarfile always end with a '/'
543                if not mfile.endswith('/'):
544                    mfile += '/'
545
546                if mfile not in tarnames:
547                    return True
548
549                for root, dirs, files in os.walk(fpath, topdown=True):
550                    for elt in files:
551                        path = os.path.join(root, elt)
552
553                        rpath = self.ws.repo.path
554                        if not rpath.endswith('/'):
555                            rpath += '/'
556
557                        path = path.replace(rpath, '', 1)
558                        if path not in tarnames:
559                            return True # In workspace not tar
560            else:
561                if os.path.exists(fpath) and mfile not in tarnames:
562                    return True
563
564        return False
565
566    def cleanup(self):
567        '''Remove backed up workspace metadata'''
568        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
569            os.unlink(self.bu.backupfile('metadata.tar.gz'))
570
571
572class CdmBackup(object):
573    '''A backup of a given workspace'''
574
575    def __init__(self, ui, ws, name):
576        self.ws = ws
577        self.ui = ui
578        self.backupdir = self._find_backup_dir(name)
579
580        #
581        # The order of instances here controls the order the various operations
582        # are run.
583        #
584        # There's some inherent dependence, in that on restore we need
585        # to restore committed changes prior to uncommitted changes
586        # (as the parent revision of any uncommitted changes is quite
587        # likely to not exist until committed changes are restored).
588        # Metadata restore can happen at any point, but happens last
589        # as a matter of convention.
590        #
591        self.modules = [x(self, ws) for x in [CdmCommittedBackup,
592                                              CdmUncommittedBackup,
593                                              CdmMetadataBackup]]
594
595
596        if os.path.exists(os.path.join(self.backupdir, 'latest')):
597            generation = os.readlink(os.path.join(self.backupdir, 'latest'))
598            self.generation = int(os.path.split(generation)[1])
599        else:
600            self.generation = 0
601
602    def _find_backup_dir(self, name):
603        '''Find the path to an appropriate backup directory based on NAME'''
604        backupdir = None
605        backupbase = None
606
607        if os.path.isabs(name):
608            return name
609
610        if self.ui.config('cdm', 'backupdir'):
611            backupbase = os.path.expanduser(self.ui.config('cdm', 'backupdir'))
612        else:
613            home = None
614
615            try:
616                home = os.getenv('HOME') or pwd.getpwuid(os.getuid()).pw_dir
617            except KeyError:
618                pass                    # Handled anyway
619
620            if not home:
621                raise util.Abort('Could not determine your HOME directory to '
622                                 'find backup path')
623
624            backupbase = os.path.join(home, 'cdm.backup')
625
626        backupdir = os.path.join(backupbase, name)
627
628        # If backupdir exists, it must be a directory.
629        if (os.path.exists(backupdir) and not os.path.isdir(backupdir)):
630            raise util.Abort('%s exists but is not a directory' % backupdir)
631
632        return backupdir
633
634    def backupfile(self, path):
635        '''return full path to backup file FILE at GEN'''
636        return os.path.join(self.backupdir, str(self.generation), path)
637
638    def update_latest(self, gen):
639        '''Update latest symlink to point to the current generation'''
640        linkpath = os.path.join(self.backupdir, 'latest')
641
642        if os.path.lexists(linkpath):
643            os.unlink(linkpath)
644
645        os.symlink(str(gen), linkpath)
646
647    def create_gen(self, gen):
648        '''Create a new backup generation'''
649        try:
650            os.makedirs(os.path.join(self.backupdir, str(gen)))
651            self.update_latest(gen)
652        except EnvironmentError, e:
653            raise util.Abort("Couldn't create backup generation %s: %s" %
654                             (os.path.join(self.backupdir, str(gen)), e))
655
656    def need_backup(self):
657        '''Compare backed up changes to workspace'''
658        #
659        # If there's no current backup generation, or the last backup was
660        # invalid (lacking the dirstate file), we need a backup regardless
661        # of anything else.
662        #
663        if (not self.generation or
664            not os.path.exists(self.backupfile('dirstate'))):
665            return True
666
667        for x in self.modules:
668            if x.need_backup():
669                return True
670
671        return False
672
673    def backup(self):
674        '''Take a backup of the current workspace
675
676        Calling code is expected to hold both the working copy lock
677        and repository lock.'''
678
679        if not os.path.exists(self.backupdir):
680            try:
681                os.makedirs(self.backupdir)
682            except EnvironmentError, e:
683                raise util.Abort('Could not create backup directory %s: %s' %
684                                 (self.backupdir, e))
685
686        self.generation += 1
687        self.create_gen(self.generation)
688
689        try:
690            for x in self.modules:
691                x.backup()
692        except Exception, e:
693            if isinstance(e, KeyboardInterrupt):
694                self.ws.ui.warn("Interrupted\n")
695            else:
696                self.ws.ui.warn("Error: %s\n" % e)
697                if Version.at_least("1.3.0"):
698                    show_traceback = self.ws.ui.configbool('ui', 'traceback',
699                                                   False)
700                else:
701                    show_traceback = self.ws.ui.traceback
702
703                #
704                # If it's not a 'normal' error, we want to print a stack
705                # trace now in case the attempt to remove the partial
706                # backup also fails, and raises a second exception.
707                #
708                if (not isinstance(e, (EnvironmentError, util.Abort))
709                    or show_traceback):
710                    traceback.print_exc()
711
712            for x in self.modules:
713                x.cleanup()
714
715            os.rmdir(os.path.join(self.backupdir, str(self.generation)))
716            self.generation -= 1
717
718            if self.generation != 0:
719                self.update_latest(self.generation)
720            else:
721                os.unlink(os.path.join(self.backupdir, 'latest'))
722
723            raise util.Abort('Backup failed')
724
725    def restore(self, gen=None):
726        '''Restore workspace from backup
727
728        Restores from backup generation GEN (defaulting to the latest)
729        into workspace WS.
730
731        Calling code is expected to hold both the working copy lock
732        and repository lock of the destination workspace.'''
733
734        if not os.path.exists(self.backupdir):
735            raise util.Abort('Backup directory does not exist: %s' %
736                             (self.backupdir))
737
738        if gen:
739            if not os.path.exists(os.path.join(self.backupdir, str(gen))):
740                raise util.Abort('Backup generation does not exist: %s' %
741                                 (os.path.join(self.backupdir, str(gen))))
742            self.generation = int(gen)
743
744        if not self.generation: # This is ok, 0 is not a valid generation
745            raise util.Abort('Backup has no generations: %s' % self.backupdir)
746
747        if not os.path.exists(self.backupfile('dirstate')):
748            raise util.Abort('Backup %s/%s is incomplete (dirstate missing)' %
749                             (self.backupdir, self.generation))
750
751        try:
752            for x in self.modules:
753                x.restore()
754        except util.Abort, e:
755            raise util.Abort('Error restoring workspace:\n'
756                             '%s\n'
757                             'Workspace may be partially restored' % e)
758