xref: /titanic_50/usr/src/tools/onbld/Scm/Backup.py (revision 1dc8bc23152a02d4586ec1fd8612f7e8f57ceb42)
1#
2#  This program is free software; you can redistribute it and/or modify
3#  it under the terms of the GNU General Public License version 2
4#  as published by the Free Software Foundation.
5#
6#  This program is distributed in the hope that it will be useful,
7#  but WITHOUT ANY WARRANTY; without even the implied warranty of
8#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9#  GNU General Public License for more details.
10#
11#  You should have received a copy of the GNU General Public License
12#  along with this program; if not, write to the Free Software
13#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14#
15
16#
17# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
18# Use is subject to license terms.
19#
20# ident	"%Z%%M%	%I%	%E% SMI"
21#
22
23'''
24Workspace backup
25
26Backup format is:
27   backupdir/
28      wsname/
29         generation#/
30            dirstate (handled by CdmUncommittedBackup)
31                File containing dirstate nodeid (the tip we expect to be at
32                after applying the bundle).
33
34            bundle (handled by CdmCommittedBackup)
35                An Hg bundle containing outgoing committed changes.
36
37            nodes (handled by CdmCommittedBackup)
38                A text file listing the full (hex) nodeid of all nodes in
39                bundle, used by need_backup.
40
41            diff (handled by CdmUncommittedBackup)
42                A Git-formatted diff containing uncommitted changes.
43
44            renames (handled by CdmUncommittedBackup)
45                A list of renames in the working copy that have to be
46                applied manually, rather than by the diff.
47
48            metadata.tar.gz (handled by CdmMetadataBackup)
49                $CODEMGR_WS/.hg/hgrc
50                $CODEMGR_WS/.hg/localtags
51                $CODEMGR_WS/.hg/patches (Mq data)
52
53         latest -> generation#
54            Newest backup generation.
55
56All files in a given backup generation, with the exception of
57dirstate, are optional.
58'''
59
60import os, pwd, shutil, traceback, tarfile, time
61from mercurial import changegroup, patch, node, util
62from cStringIO import StringIO
63
64
65class CdmCommittedBackup(object):
66    '''Backup of committed changes'''
67
68    def __init__(self, backup, ws):
69        self.ws = ws
70        self.bu = backup
71        self.files = ('bundle', 'nodes')
72
73    def _outgoing_nodes(self, parent):
74        '''Return a list of all outgoing nodes in hex format'''
75
76        if parent:
77            outgoing = self.ws.findoutgoing(parent)
78            nodes = self.ws.repo.changelog.nodesbetween(outgoing)[0]
79            return map(node.hex, nodes)
80        else:
81            return []
82
83    def backup(self):
84        '''Backup committed changes'''
85        parent = self.ws.parent()
86
87        if not parent:
88            self.ws.ui.warn('Workspace has no parent, committed changes will '
89                            'not be backed up\n')
90            return
91
92        out = self.ws.findoutgoing(parent)
93        if not out:
94            return
95
96        cg = self.ws.repo.changegroup(out, 'bundle')
97        changegroup.writebundle(cg, self.bu.backupfile('bundle'), 'HG10BZ')
98
99        outnodes = self._outgoing_nodes(parent)
100        if outnodes:
101            fp = None
102            try:
103                try:
104                    fp = open(self.bu.backupfile('nodes'), 'w')
105                    fp.write('%s\n' % '\n'.join(outnodes))
106                except EnvironmentError, e:
107                    raise util.Abort("couldn't store outgoing nodes: %s" % e)
108            finally:
109                if fp and not fp.closed:
110                    fp.close()
111
112    def restore(self):
113        '''Restore committed changes from backup'''
114        bfile = self.bu.backupfile('bundle')
115
116        if os.path.exists(bfile):
117            f = None
118            try:
119                try:
120                    f = open(bfile, 'r')
121                    bundle = changegroup.readbundle(f, bfile)
122                    self.ws.repo.addchangegroup(bundle, 'strip',
123                                                'bundle:%s' % bfile)
124                except EnvironmentError, e:
125                    raise util.Abort("couldn't restore committed changes: %s\n"
126                                     "   %s" % (bfile, e))
127            finally:
128                if f and not f.closed:
129                    f.close()
130
131    def need_backup(self):
132        '''Compare backup of committed changes to workspace'''
133
134        if os.path.exists(self.bu.backupfile('nodes')):
135            f = None
136            try:
137                try:
138                    f = open(self.bu.backupfile('nodes'))
139                    bnodes = set([line.rstrip('\r\n')
140                                  for line in f.readlines()])
141                    f.close()
142                except EnvironmentError, e:
143                    raise util.Abort("couldn't open backup node list: %s" % e)
144            finally:
145                if f and not f.closed:
146                    f.close()
147        else:
148            bnodes = set()
149
150        outnodes = set(self._outgoing_nodes(self.ws.parent()))
151        if outnodes != bnodes:
152            return True
153
154        return False
155
156    def cleanup(self):
157        '''Remove backed up committed changes'''
158
159        for fname in self.files:
160            if os.path.exists(self.bu.backupfile(fname)):
161                os.unlink(self.bu.backupfile(fname))
162
163
164class CdmUncommittedBackup(object):
165    '''Backup of uncommitted changes'''
166
167    def __init__(self, backup, ws):
168        self.ws = ws
169        self.bu = backup
170
171    def _clobbering_renames(self):
172        '''Return a list of pairs of files representing renames/copies
173        that clobber already versioned files.  [(oldname newname)...]'''
174
175        #
176        # Note that this doesn't handle uncommitted merges
177        # as CdmUncommittedBackup itself doesn't.
178        #
179        wctx = self.ws.repo.workingctx()
180        parent = wctx.parents()[0]
181
182        ret = []
183        for fname in wctx.added() + wctx.modified():
184            rn = wctx.filectx(fname).renamed()
185            if rn and fname in parent:
186                ret.append((rn[0], fname))
187        return ret
188
189    def backup(self):
190        '''Backup uncommitted changes'''
191
192        if self.ws.merged():
193            raise util.Abort("Unable to backup an uncommitted merge.\n"
194                             "Please complete your merge and commit")
195
196        dirstate = node.hex(self.ws.repo.changectx().node())
197
198        fp = None
199        try:
200            try:
201                fp = open(self.bu.backupfile('dirstate'), 'w')
202                fp.write(dirstate + '\n')
203            except EnvironmentError, e:
204                raise util.Abort("couldn't save working copy parent: %s" % e)
205        finally:
206            if fp and not fp.closed:
207                fp.close()
208
209        try:
210            try:
211                fp = open(self.bu.backupfile('renames'), 'w')
212                for cons in self._clobbering_renames():
213                    fp.write("%s %s\n" % cons)
214            except EnvironmentError, e:
215                raise util.Abort("couldn't save clobbering copies: %s" % e)
216        finally:
217            if fp and not fp.closed:
218                fp.close()
219
220        try:
221            try:
222                fp = open(self.bu.backupfile('diff'), 'w')
223                patch.diff(self.ws.repo, fp=fp,
224                           opts=patch.diffopts(self.ws.ui, opts={'git': True}))
225            except EnvironmentError, e:
226                raise util.Abort("couldn't save working copy diff: %s" % e)
227        finally:
228            if fp and not fp.closed:
229                fp.close()
230
231    def _dirstate(self):
232        '''Return the current working copy node'''
233        fp = None
234        try:
235            try:
236                fp = open(self.bu.backupfile('dirstate'))
237                dirstate = fp.readline().strip()
238                return dirstate
239            except EnvironmentError, e:
240                raise util.Abort("couldn't read saved parent: %s" % e)
241        finally:
242            if fp and not fp.closed:
243                fp.close()
244
245    def restore(self):
246        '''Restore uncommitted changes'''
247        diff = self.bu.backupfile('diff')
248        dirstate = self._dirstate()
249
250        try:
251            self.ws.clean(rev=dirstate)
252        except util.Abort, e:
253            raise util.Abort("couldn't update to saved node: %s" % e)
254
255        if not os.path.exists(diff):
256            return
257
258        #
259        # There's a race here whereby if the patch (or part thereof)
260        # is applied within the same second as the clean above (such
261        # that mtime doesn't change) and if the size of that file
262        # does not change, Hg may not see the change.
263        #
264        # We sleep a full second to avoid this, as sleeping merely
265        # until the next second begins would require very close clock
266        # synchronization on network filesystems.
267        #
268        time.sleep(1)
269
270        files = {}
271        try:
272            try:
273                fuzz = patch.patch(diff, self.ws.ui, strip=1,
274                                   cwd=self.ws.repo.root, files=files)
275                if fuzz:
276                    raise util.Abort('working copy diff applied with fuzz')
277            except Exception, e:
278                raise util.Abort("couldn't apply working copy diff: %s\n"
279                                 "   %s" % (diff, e))
280        finally:
281            patch.updatedir(self.ws.ui, self.ws.repo, files)
282
283        if not os.path.exists(self.bu.backupfile('renames')):
284            return
285
286        #
287        # We need to re-apply name changes where the new name
288        # (rename/copy destination) is an already versioned file, as
289        # Hg would otherwise ignore them.
290        #
291        try:
292            fp = open(self.bu.backupfile('renames'))
293            for line in fp:
294                source, dest = line.strip().split()
295                self.ws.repo.copy(source, dest)
296        except EnvironmentError, e:
297            raise util.Abort('unable to open renames file: %s' % e)
298        except ValueError:
299            raise util.Abort('corrupt renames file: %s' %
300                             self.bu.backupfile('renames'))
301
302    def need_backup(self):
303        '''Compare backup of uncommitted changes to workspace'''
304        if self._dirstate() != node.hex(self.ws.repo.changectx().node()):
305            return True
306
307        curdiff = StringIO()
308        diff = self.bu.backupfile('diff')
309        fd = None
310
311        patch.diff(self.ws.repo, fp=curdiff,
312                   opts=patch.diffopts(self.ws.ui, opts={'git': True}))
313
314        if os.path.exists(diff):
315            try:
316                try:
317                    fd = open(diff)
318                    backdiff = fd.read()
319                except EnvironmentError, e:
320                    raise util.Abort("couldn't open backup diff %s\n"
321                                     "   %s" % (diff, e))
322            finally:
323                if fd and not fd.closed:
324                    fd.close()
325        else:
326            backdiff = ''
327
328        if backdiff != curdiff.getvalue():
329            return True
330
331
332        currrenamed = self._clobbering_renames()
333        bakrenamed = None
334
335        if os.path.exists(self.bu.backupfile('renames')):
336            try:
337                try:
338                    fd = open(self.bu.backupfile('renames'))
339                    bakrenamed = [line.strip().split(' ') for line in fd]
340                except EnvironmentError, e:
341                    raise util.Abort("couldn't open renames file %s: %s\n" %
342                                     (self.bu.backupfile('renames'), e))
343            finally:
344                if fd and not fd.closed:
345                    fd.close()
346
347            if currrenamed != bakrenamed:
348                return True
349
350        return False
351
352    def cleanup(self):
353        '''Remove backed up uncommitted changes'''
354        for fname in ('dirstate', 'diff', 'renames'):
355            if os.path.exists(self.bu.backupfile(fname)):
356                os.unlink(self.bu.backupfile(fname))
357
358
359class CdmMetadataBackup(object):
360    '''Backup of workspace metadata'''
361
362    def __init__(self, backup, ws):
363        self.bu = backup
364        self.ws = ws
365        self.files = ('hgrc', 'localtags', 'patches')
366
367    def backup(self):
368        '''Backup workspace metadata'''
369
370        tar = None
371
372        try:
373            try:
374                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'),
375                                   'w:gz')
376                tar.errorlevel = 2
377            except (EnvironmentError, tarfile.TarError), e:
378                raise util.Abort("couldn't open %s for writing: %s" %
379                                 (self.bu.backupfile('metadata.tar.gz'), e))
380
381            try:
382                for elt in self.files:
383                    fpath = self.ws.repo.join(elt)
384                    if os.path.exists(fpath):
385                        tar.add(fpath, elt)
386            except (EnvironmentError, tarfile.TarError), e:
387                #
388                # tarfile.TarError doesn't include the tar member or file
389                # in question, so we have to do so ourselves.
390                #
391                if isinstance(e, tarfile.TarError):
392                    error = "%s: %s" % (elt, e)
393                else:
394                    error = str(e)
395
396                raise util.Abort("couldn't backup metadata to %s:\n"
397                                 "  %s" %
398                                 (self.bu.backupfile('metadata.tar.gz'),
399                                  error))
400        finally:
401            if tar and not tar.closed:
402                tar.close()
403
404    def old_restore(self):
405        '''Restore workspace metadata from an pre-tar backup'''
406
407        for fname in self.files:
408            bfile = self.bu.backupfile(fname)
409            wfile = self.ws.repo.join(fname)
410
411            if os.path.exists(bfile):
412                try:
413                    shutil.copy2(bfile, wfile)
414                except EnvironmentError, e:
415                    raise util.Abort("couldn't restore metadata from %s:\n"
416                                     "   %s" % (bfile, e))
417
418    def tar_restore(self):
419        '''Restore workspace metadata (from a tar-style backup)'''
420
421        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
422            tar = None
423
424            try:
425                try:
426                    tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
427                    tar.errorlevel = 2
428                except (EnvironmentError, tarfile.TarError), e:
429                    raise util.Abort("couldn't open %s: %s" %
430                                 (self.bu.backupfile('metadata.tar.gz'), e))
431
432                try:
433                    for elt in tar:
434                        tar.extract(elt, path=self.ws.repo.path)
435                except (EnvironmentError, tarfile.TarError), e:
436                    # Make sure the member name is in the exception message.
437                    if isinstance(e, tarfile.TarError):
438                        error = "%s: %s" % (elt.name, e)
439                    else:
440                        error = str(e)
441
442                    raise util.Abort("couldn't restore metadata from %s:\n"
443                                     "   %s" %
444                                     (self.bu.backupfile('metadata.tar.gz'),
445                                      error))
446            finally:
447                if tar and not tar.closed:
448                    tar.close()
449
450    def restore(self):
451        '''Restore workspace metadata'''
452
453        if os.path.exists(self.bu.backupfile('hgrc')):
454            self.old_restore()
455        else:
456            self.tar_restore()
457
458    def need_backup(self):
459        '''Compare backed up workspace metadata to workspace'''
460
461        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
462            try:
463                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
464                tar.errorlevel = 2
465            except (EnvironmentError, tarfile.TarError), e:
466                raise util.Abort("couldn't open metadata tarball: %s\n"
467                                 "   %s" %
468                                 (self.bu.backupfile('metadata.tar.gz'), e))
469
470            for elt in tar:
471                fpath = self.ws.repo.join(elt.name)
472                if not os.path.exists(fpath):
473                    return True     # File in tar, not workspace
474
475                if elt.isdir():     # Don't care about directories
476                    continue
477
478                if (elt.mtime != os.path.getmtime(fpath) or
479                    elt.size != os.path.getsize(fpath)):
480                    return True
481
482            tarnames = tar.getnames()
483            tar.close()
484        else:
485            tarnames = []
486
487        for mfile in self.files:
488            fpath = self.ws.repo.join(mfile)
489
490            if os.path.isdir(fpath):
491                # Directories in tarfile always end with a '/'
492                if not mfile.endswith('/'):
493                    mfile += '/'
494
495                if mfile not in tarnames:
496                    return True
497
498                for root, dirs, files in os.walk(fpath, topdown=True):
499                    for elt in files:
500                        path = os.path.join(root, elt)
501
502                        rpath = self.ws.repo.path
503                        if not rpath.endswith('/'):
504                            rpath += '/'
505
506                        path = path.replace(rpath, '', 1)
507                        if path not in tarnames:
508                            return True # In workspace not tar
509            else:
510                if os.path.exists(fpath) and mfile not in tarnames:
511                    return True
512
513        return False
514
515    def cleanup(self):
516        '''Remove backed up workspace metadata'''
517        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
518            os.unlink(self.bu.backupfile('metadata.tar.gz'))
519
520
521class CdmBackup(object):
522    '''A backup of a given workspace'''
523
524    def __init__(self, ui, ws, name):
525        self.ws = ws
526        self.ui = ui
527        self.backupdir = self._find_backup_dir(name)
528
529        #
530        # The order of instances here controls the order the various operations
531        # are run.
532        #
533        # There's some inherent dependence, in that on restore we need
534        # to restore committed changes prior to uncommitted changes
535        # (as the parent revision of any uncommitted changes is quite
536        # likely to not exist until committed changes are restored).
537        # Metadata restore can happen at any point, but happens last
538        # as a matter of convention.
539        #
540        self.modules = [x(self, ws) for x in [CdmCommittedBackup,
541                                              CdmUncommittedBackup,
542                                              CdmMetadataBackup]]
543
544
545        if os.path.exists(os.path.join(self.backupdir, 'latest')):
546            generation = os.readlink(os.path.join(self.backupdir, 'latest'))
547            self.generation = int(os.path.split(generation)[1])
548        else:
549            self.generation = 0
550
551    def _find_backup_dir(self, name):
552        '''Find the path to an appropriate backup directory based on NAME'''
553        backupdir = None
554        backupbase = None
555
556        if os.path.isabs(name):
557            return name
558
559        if self.ui.config('cdm', 'backupdir'):
560            backupbase = os.path.expanduser(self.ui.config('cdm', 'backupdir'))
561        else:
562            home = None
563
564            try:
565                home = os.getenv('HOME') or pwd.getpwuid(os.getuid()).pw_dir
566            except KeyError:
567                pass                    # Handled anyway
568
569            if not home:
570                raise util.Abort('Could not determine your HOME directory to '
571                                 'find backup path')
572
573            backupbase = os.path.join(home, 'cdm.backup')
574
575        backupdir = os.path.join(backupbase, name)
576
577        # If backupdir exists, it must be a directory.
578        if (os.path.exists(backupdir) and not os.path.isdir(backupdir)):
579            raise util.Abort('%s exists but is not a directory' % backupdir)
580
581        return backupdir
582
583    def backupfile(self, path):
584        '''return full path to backup file FILE at GEN'''
585        return os.path.join(self.backupdir, str(self.generation), path)
586
587    def update_latest(self, gen):
588        '''Update latest symlink to point to the current generation'''
589        linkpath = os.path.join(self.backupdir, 'latest')
590
591        if os.path.lexists(linkpath):
592            os.unlink(linkpath)
593
594        os.symlink(str(gen), linkpath)
595
596    def create_gen(self, gen):
597        '''Create a new backup generation'''
598        try:
599            os.makedirs(os.path.join(self.backupdir, str(gen)))
600            self.update_latest(gen)
601        except EnvironmentError, e:
602            raise util.Abort("Couldn't create backup generation %s: %s" %
603                             (os.path.join(self.backupdir, str(gen)), e))
604
605    def need_backup(self):
606        '''Compare backed up changes to workspace'''
607        #
608        # If there's no current backup generation, or the last backup was
609        # invalid (lacking the dirstate file), we need a backup regardless
610        # of anything else.
611        #
612        if (not self.generation or
613            not os.path.exists(self.backupfile('dirstate'))):
614            return True
615
616        for x in self.modules:
617            if x.need_backup():
618                return True
619
620        return False
621
622    def backup(self):
623        '''Take a backup of the current workspace'''
624
625        if not os.path.exists(self.backupdir):
626            try:
627                os.makedirs(self.backupdir)
628            except EnvironmentError, e:
629                raise util.Abort('Could not create backup directory %s: %s' %
630                                 (self.backupdir, e))
631
632        self.generation += 1
633        self.create_gen(self.generation)
634
635        #
636        # Lock the repo, so the backup can be consistent.  We need the
637        # wlock too to make sure the dirstate parent doesn't change
638        # underneath us.
639        #
640
641        lock = self.ws.repo.lock()
642        wlock = self.ws.repo.lock()
643
644        try:
645            for x in self.modules:
646                x.backup()
647        except Exception, e:
648            if isinstance(e, KeyboardInterrupt):
649                self.ws.ui.warn("Interrupted\n")
650            else:
651                self.ws.ui.warn("Error: %s\n" % e)
652
653                #
654                # If it's not a 'normal' error, we want to print a stack
655                # trace now in case the attempt to remove the partial
656                # backup also fails, and raises a second exception.
657                #
658                if (not isinstance(e, (EnvironmentError, util.Abort))
659                    or self.ws.ui.traceback):
660                    traceback.print_exc()
661
662            for x in self.modules:
663                x.cleanup()
664
665            os.rmdir(os.path.join(self.backupdir, str(self.generation)))
666            self.generation -= 1
667
668            if self.generation != 0:
669                self.update_latest(self.generation)
670            else:
671                os.unlink(os.path.join(self.backupdir, 'latest'))
672
673            raise util.Abort('Backup failed')
674
675    def restore(self, gen=None):
676        '''Restore workspace from backup
677
678        Restores from backup generation GEN (defaulting to the latest)
679        into workspace WS.'''
680
681        wlock = self.ws.repo.wlock()
682        lock = self.ws.repo.lock()
683
684        if not os.path.exists(self.backupdir):
685            raise util.Abort('Backup directory does not exist: %s' %
686                             (self.backupdir))
687
688        if gen:
689            if not os.path.exists(os.path.join(self.backupdir, str(gen))):
690                raise util.Abort('Backup generation does not exist: %s' %
691                                 (os.path.join(self.backupdir, str(gen))))
692            self.generation = int(gen)
693
694        if not self.generation: # This is ok, 0 is not a valid generation
695            raise util.Abort('Backup has no generations: %s' % self.backupdir)
696
697        if not os.path.exists(self.backupfile('dirstate')):
698            raise util.Abort('Backup %s/%s is incomplete (dirstate missing)' %
699                             (self.backupdir, self.generation))
700
701        try:
702            for x in self.modules:
703                x.restore()
704        except util.Abort, e:
705            raise util.Abort('Error restoring workspace:\n'
706                             '%s\n'
707                             'Workspace will be partially restored' % e)
708