xref: /titanic_44/usr/src/tools/onbld/Scm/WorkSpace.py (revision 3c112a2b34403220c06c3e2fcac403358cfba168)
1#
2#  This program is free software; you can redistribute it and/or modify
3#  it under the terms of the GNU General Public License version 2
4#  as published by the Free Software Foundation.
5#
6#  This program is distributed in the hope that it will be useful,
7#  but WITHOUT ANY WARRANTY; without even the implied warranty of
8#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9#  GNU General Public License for more details.
10#
11#  You should have received a copy of the GNU General Public License
12#  along with this program; if not, write to the Free Software
13#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14#
15
16#
17# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
18# Use is subject to license terms.
19#
20
21#
22# Theory:
23#
24# Workspaces have a non-binding parent/child relationship.
25# All important operations apply to the changes between the two.
26#
27# However, for the sake of remote operation, the 'parent' of a
28# workspace is not seen as a literal entity, instead the figurative
29# parent contains the last changeset common to both parent and child,
30# as such the 'parent tip' is actually nothing of the sort, but instead a
31# convenient imitation.
32#
33# Any change made to a workspace is a change to a file therein, such
34# changes can be represented briefly as whether the file was
35# modified/added/removed as compared to the parent workspace, whether
36# the file has a different name in the parent and if so, whether it
37# was renamed or merely copied.  Each changed file has an
38# associated ActiveEntry.
39#
40# The ActiveList being a list ActiveEntrys can thus present the entire
41# change in workspace state between a parent and its child, and is the
42# important bit here (in that if it is incorrect, everything else will
43# be as incorrect, or more)
44#
45
46import cStringIO
47import os
48from mercurial import cmdutil, context, hg, node, patch, repair, util
49from hgext import mq
50
51from onbld.Scm import Version
52
53#
54# Mercurial >= 1.2 has its exception types in a mercurial.error
55# module, prior versions had them in their associated modules.
56#
57if Version.at_least("1.2"):
58    from mercurial import error
59    HgRepoError = error.RepoError
60    HgLookupError = error.LookupError
61else:
62    from mercurial import repo, revlog
63    HgRepoError = repo.RepoError
64    HgLookupError = revlog.LookupError
65
66
67class ActiveEntry(object):
68    '''Representation of the changes made to a single file.
69
70    MODIFIED   - Contents changed, but no other changes were made
71    ADDED      - File is newly created
72    REMOVED    - File is being removed
73
74    Copies are represented by an Entry whose .parentname is non-nil
75
76    Truly copied files have non-nil .parentname and .renamed = False
77    Renames have non-nil .parentname and .renamed = True
78
79    Do not access any of this information directly, do so via the
80
81    .is_<change>() methods.'''
82
83    MODIFIED = 1
84    ADDED = 2
85    REMOVED = 3
86
87    def __init__(self, name):
88        self.name = name
89        self.change = None
90        self.parentname = None
91        # As opposed to copied (or neither)
92        self.renamed = False
93        self.comments = []
94
95    #
96    # ActiveEntrys sort by the name of the file they represent.
97    #
98    def __cmp__(self, other):
99        return cmp(self.name, other.name)
100
101    def is_added(self):
102        return self.change == self.ADDED
103
104    def is_modified(self):
105        return self.change == self.MODIFIED
106
107    def is_removed(self):
108        return self.change == self.REMOVED
109
110    def is_renamed(self):
111        return self.parentname and self.renamed
112
113    def is_copied(self):
114        return self.parentname and not self.renamed
115
116
117class ActiveList(object):
118    '''Complete representation of workspace change.
119
120    In practice, a container for ActiveEntrys, and methods to build them,
121    update them, and deal with them en masse.'''
122
123    def __init__(self, ws, parenttip, revs=None):
124        self._active = {}
125        self.ws = ws
126
127        self.revs = revs
128
129        self.base = None
130        self.parenttip = parenttip
131
132        #
133        # If we couldn't find a parenttip, the two repositories must
134        # be unrelated (Hg catches most of this, but this case is valid for it
135        # but invalid for us)
136        #
137        if self.parenttip == None:
138            raise util.Abort('repository is unrelated')
139        self.localtip = None
140
141        if revs:
142            self.base = revs[0]
143            self.localtip = revs[-1]
144
145        self._comments = []
146
147        self._build(revs)
148
149    def _build(self, revs):
150        if not revs:
151            return
152
153        status = self.ws.status(self.parenttip.node(), self.localtip.node())
154
155        files = []
156        for ctype in status.values():
157            files.extend(ctype)
158
159        #
160        # When a file is renamed, two operations actually occur.
161        # A file copy from source to dest and a removal of source.
162        #
163        # These are represented as two distinct entries in the
164        # changectx and status (one on the dest file for the
165        # copy, one on the source file for the remove).
166        #
167        # Since these are unconnected in both the context and
168        # status we can only make the association by explicitly
169        # looking for it.
170        #
171        # We deal with this thusly:
172        #
173        # We maintain a dict dest -> source of all copies
174        # (updating dest as appropriate, but leaving source alone).
175        #
176        # After all other processing, we mark as renamed any pair
177        # where source is on the removed list.
178        #
179        copies = {}
180
181        #
182        # Walk revs looking for renames and adding files that
183        # are in both change context and status to the active
184        # list.
185        #
186        for ctx in revs:
187            desc = ctx.description().splitlines()
188
189            self._comments.extend(desc)
190
191            for fname in ctx.files():
192                #
193                # We store comments per-entry as well, for the sake of
194                # webrev and similar.  We store twice to avoid the problems
195                # of uniquifying comments for the general list (and possibly
196                # destroying multi-line entities in the process).
197                #
198                if fname not in self:
199                    self._addentry(fname)
200                self[fname].comments.extend(desc)
201
202                try:
203                    fctx = ctx.filectx(fname)
204                except HgLookupError:
205                    continue
206
207                #
208                # NB: .renamed() is a misnomer, this actually checks
209                #     for copies.
210                #
211                rn = fctx.renamed()
212                if rn:
213                    #
214                    # If the source file is a known copy we know its
215                    # ancestry leads us to the parent.
216                    # Otherwise make sure the source file is known to
217                    # be in the parent, we need not care otherwise.
218                    #
219                    # We detect cycles at a later point.  There is no
220                    # reason to continuously handle them.
221                    #
222                    if rn[0] in copies:
223                        copies[fname] = copies[rn[0]]
224                    elif rn[0] in self.parenttip.manifest():
225                        copies[fname] = rn[0]
226
227        #
228        # Walk the copy list marking as copied any non-cyclic pair
229        # where the destination file is still present in the local
230        # tip (to avoid ephemeral changes)
231        #
232        # Where source is removed, mark as renamed, and remove the
233        # AL entry for the source file
234        #
235        for fname, oldname in copies.iteritems():
236            if fname == oldname or fname not in self.localtip.manifest():
237                continue
238
239            self[fname].parentname = oldname
240
241            if oldname in status['removed']:
242                self[fname].renamed = True
243                if oldname in self:
244                    del self[oldname]
245
246        #
247        # Walk the active list setting the change type for each active
248        # file.
249        #
250        # In the case of modified files that are not renames or
251        # copies, we do a content comparison, and drop entries that
252        # are not actually modified.
253        #
254        # We walk a copy of the AL such that we can drop entries
255        # within the loop.
256        #
257        for entry in self._active.values():
258            if entry.name not in files:
259                del self[entry.name]
260                continue
261
262            if entry.name in status['added']:
263                entry.change = ActiveEntry.ADDED
264            elif entry.name in status['removed']:
265                entry.change = ActiveEntry.REMOVED
266            elif entry.name in status['modified']:
267                entry.change = ActiveEntry.MODIFIED
268
269            #
270            # There are cases during a merge where a file will be in
271            # the status return as modified, but in reality be an
272            # addition (ie, not in the parenttip).
273            #
274            # We need to check whether the file is actually present
275            # in the parenttip, and set it as an add, if not.
276            #
277            if entry.name not in self.parenttip.manifest():
278                entry.change = ActiveEntry.ADDED
279            elif entry.is_modified() and not entry.parentname:
280                if not self.filecmp(entry):
281                    del self[entry.name]
282                    continue
283
284            assert entry.change
285
286    def __contains__(self, fname):
287        return fname in self._active
288
289    def __getitem__(self, key):
290        return self._active[key]
291
292    def __setitem__(self, key, value):
293        self._active[key] = value
294
295    def __delitem__(self, key):
296        del self._active[key]
297
298    def __iter__(self):
299        for entry in self._active.values():
300            yield entry
301
302    def _addentry(self, fname):
303        if fname not in self:
304            self[fname] = ActiveEntry(fname)
305
306    def files(self):
307        '''Return the list of pathnames of all files touched by this
308        ActiveList
309
310        Where files have been renamed, this will include both their
311        current name and the name which they had in the parent tip.
312        '''
313
314        ret = self._active.keys()
315        ret.extend([x.parentname for x in self
316                    if x.is_renamed() and x.parentname not in ret])
317        return ret
318
319    def comments(self):
320        return self._comments
321
322    def bases(self):
323        '''Return the list of changesets that are roots of the ActiveList.
324
325        This is the set of active changesets where neither parent
326        changeset is itself active.'''
327
328        revset = set(self.revs)
329        return filter(lambda ctx: not [p for p in ctx.parents() if p in revset],
330                      self.revs)
331
332    def tags(self):
333        '''Find tags that refer to a changeset in the ActiveList,
334        returning a list of 3-tuples (tag, node, is_local) for each.
335
336        We return all instances of a tag that refer to such a node,
337        not just that which takes precedence.'''
338
339        def colliding_tags(iterable, nodes, local):
340            for nd, name in [line.rstrip().split(' ', 1) for line in iterable]:
341                if nd in nodes:
342                    yield (name, self.ws.repo.lookup(nd), local)
343
344        tags = []
345        nodes = set(node.hex(ctx.node()) for ctx in self.revs)
346
347        if os.path.exists(self.ws.repo.join('localtags')):
348            fh = self.ws.repo.opener('localtags')
349            tags.extend(colliding_tags(fh, nodes, True))
350            fh.close()
351
352        # We want to use the tags file from the localtip
353        if '.hgtags' in self.localtip:
354            data = self.localtip.filectx('.hgtags').data().splitlines()
355            tags.extend(colliding_tags(data, nodes, False))
356
357        return tags
358
359    def prune_tags(self, data):
360        '''Return a copy of data, which should correspond to the
361        contents of a Mercurial tags file, with any tags that refer to
362        changesets which are components of the ActiveList removed.'''
363
364        nodes = set(node.hex(ctx.node()) for ctx in self.revs)
365        return [t for t in data if t.split(' ', 1)[0] not in nodes]
366
367    def filecmp(self, entry):
368        '''Compare two revisions of two files
369
370        Return True if file changed, False otherwise.
371
372        The fast path compares file metadata, slow path is a
373        real comparison of file content.'''
374
375        parentfile = self.parenttip.filectx(entry.parentname or entry.name)
376        localfile = self.localtip.filectx(entry.name)
377
378        #
379        # NB: Keep these ordered such as to make every attempt
380        #     to short-circuit the more time consuming checks.
381        #
382        if parentfile.size() != localfile.size():
383            return True
384
385        if parentfile.flags() != localfile.flags():
386            return True
387
388        if parentfile.cmp(localfile.data()):
389            return True
390
391    def context(self, message, user):
392        '''Return a Mercurial context object representing the entire
393        ActiveList as one change.'''
394        return activectx(self, message, user)
395
396
397class activectx(context.memctx):
398    '''Represent an ActiveList as a Mercurial context object.
399
400    Part of the  WorkSpace.squishdeltas implementation.'''
401
402    def __init__(self, active, message, user):
403        '''Build an activectx object.
404
405          active  - The ActiveList object used as the source for all data.
406          message - Changeset description
407          user    - Committing user'''
408
409        def filectxfn(repository, ctx, fname):
410            fctx = active.localtip.filectx(fname)
411            data = fctx.data()
412
413            #
414            # .hgtags is a special case, tags referring to active list
415            # component changesets should be elided.
416            #
417            if fname == '.hgtags':
418                data = '\n'.join(active.prune_tags(data.splitlines()))
419
420            return context.memfilectx(fname, data, 'l' in fctx.flags(),
421                                      'x' in fctx.flags(),
422                                      active[fname].parentname)
423
424        self.__active = active
425        parents = (active.parenttip.node(), node.nullid)
426        extra = {'branch': active.localtip.branch()}
427        context.memctx.__init__(self, active.ws.repo, parents, message,
428                                active.files(), filectxfn, user=user,
429                                extra=extra)
430
431    def modified(self):
432        return [entry.name for entry in self.__active if entry.is_modified()]
433
434    def added(self):
435        return [entry.name for entry in self.__active if entry.is_added()]
436
437    def removed(self):
438        ret = set(entry.name for entry in self.__active if entry.is_removed())
439        ret.update(set(x.parentname for x in self.__active if x.is_renamed()))
440        return list(ret)
441
442    def files(self):
443        return self.__active.files()
444
445
446class WorkSpace(object):
447
448    def __init__(self, repository):
449        self.repo = repository
450        self.ui = self.repo.ui
451        self.name = self.repo.root
452
453        self.activecache = {}
454
455    def parent(self, spec=None):
456        '''Return the canonical workspace parent, either SPEC (which
457        will be expanded) if provided or the default parent
458        otherwise.'''
459
460        if spec:
461            return self.ui.expandpath(spec)
462
463        p = self.ui.expandpath('default')
464        if p == 'default':
465            return None
466        else:
467            return p
468
469    def _localtip(self, outgoing, wctx):
470        '''Return the most representative changeset to act as the
471        localtip.
472
473        If the working directory is modified (has file changes, is a
474        merge, or has switched branches), this will be a workingctx.
475
476        If the working directory is unmodified, this will be the most
477        recent (highest revision number) local (outgoing) head on the
478        current branch, if no heads are determined to be outgoing, it
479        will be the most recent head on the current branch.
480        '''
481
482        #
483        # A modified working copy is seen as a proto-branch, and thus
484        # our only option as the local tip.
485        #
486        if (wctx.files() or len(wctx.parents()) > 1 or
487            wctx.branch() != wctx.parents()[0].branch()):
488            return wctx
489
490        heads = self.repo.heads(start=wctx.parents()[0].node())
491        headctxs = [self.repo.changectx(n) for n in heads]
492        localctxs = [c for c in headctxs if c.node() in outgoing]
493
494        ltip = sorted(localctxs or headctxs, key=lambda x: x.rev())[-1]
495
496        if len(heads) > 1:
497            self.ui.warn('The current branch has more than one head, '
498                         'using %s\n' % ltip.rev())
499
500        return ltip
501
502    def _parenttip(self, heads, outgoing):
503        '''Return the highest-numbered, non-outgoing changeset that is
504        an ancestor of a changeset in heads.
505
506        This is intended to find the most recent changeset on a given
507        branch that is shared between a parent and child workspace,
508        such that it can act as a stand-in for the parent workspace.
509        '''
510
511        def tipmost_shared(head, outnodes):
512            '''Return the tipmost node on the same branch as head that is not
513            in outnodes.
514
515            We walk from head to the bottom of the workspace (revision
516            0) collecting nodes not in outnodes during the add phase
517            and return the first node we see in the iter phase that
518            was previously collected.
519
520            If no node is found (all revisions >= 0 are outgoing), the
521            only possible parenttip is the null node (node.nullid)
522            which is returned explicitly.
523
524            See the docstring of mercurial.cmdutil.walkchangerevs()
525            for the phased approach to the iterator returned.  The
526            important part to note is that the 'add' phase gathers
527            nodes, which the 'iter' phase then iterates through.'''
528
529            opts = {'rev': ['%s:0' % head.rev()],
530                    'follow': True}
531            get = util.cachefunc(lambda r: self.repo.changectx(r).changeset())
532            changeiter = cmdutil.walkchangerevs(self.repo.ui, self.repo, [],
533                                                get, opts)[0]
534            seen = []
535            for st, rev, fns in changeiter:
536                n = self.repo.changelog.node(rev)
537                if st == 'add':
538                    if n not in outnodes:
539                        seen.append(n)
540                elif st == 'iter':
541                    if n in seen:
542                        return rev
543            return self.repo.changelog.rev(node.nullid)
544
545        nodes = set(outgoing)
546        ptips = map(lambda x: tipmost_shared(x, nodes), heads)
547        return self.repo.changectx(sorted(ptips)[-1])
548
549    def status(self, base='.', head=None):
550        '''Translate from the hg 6-tuple status format to a hash keyed
551        on change-type'''
552
553        states = ['modified', 'added', 'removed', 'deleted', 'unknown',
554              'ignored']
555
556        chngs = self.repo.status(base, head)
557        return dict(zip(states, chngs))
558
559    def findoutgoing(self, parent):
560        '''Return the base set of outgoing nodes.
561
562        A caching wrapper around mercurial.localrepo.findoutgoing().
563        Complains (to the user), if the parent workspace is
564        non-existent or inaccessible'''
565
566        self.ui.pushbuffer()
567        try:
568            try:
569                ui = self.ui
570                if hasattr(cmdutil, 'remoteui'):
571                    ui = cmdutil.remoteui(ui, {})
572                pws = hg.repository(ui, parent)
573                return self.repo.findoutgoing(pws)
574            except HgRepoError:
575                self.ui.warn("Warning: Parent workspace '%s' is not "
576                             "accessible\n"
577                             "active list will be incomplete\n\n" % parent)
578                return []
579        finally:
580            self.ui.popbuffer()
581    findoutgoing = util.cachefunc(findoutgoing)
582
583    def modified(self):
584        '''Return a list of files modified in the workspace'''
585        wctx = self.workingctx()
586        return sorted(wctx.files() + wctx.deleted()) or None
587
588    def merged(self):
589        '''Return boolean indicating whether the workspace has an uncommitted
590        merge'''
591        wctx = self.workingctx()
592        return len(wctx.parents()) > 1
593
594    def branched(self):
595        '''Return boolean indicating whether the workspace has an
596        uncommitted named branch'''
597
598        wctx = self.workingctx()
599        return wctx.branch() != wctx.parents()[0].branch()
600
601    def active(self, parent=None):
602        '''Return an ActiveList describing changes between workspace
603        and parent workspace (including uncommitted changes).
604        If workspace has no parent ActiveList will still describe any
605        uncommitted changes'''
606
607        parent = self.parent(parent)
608        if parent in self.activecache:
609            return self.activecache[parent]
610
611        if parent:
612            outgoing = self.findoutgoing(parent)
613            outnodes = self.repo.changelog.nodesbetween(outgoing)[0]
614        else:
615            outgoing = []       # No parent, no outgoing nodes
616            outnodes = []
617
618        localtip = self._localtip(outnodes, self.workingctx())
619
620        if localtip.rev() is None:
621            heads = localtip.parents()
622        else:
623            heads = [localtip]
624
625        ctxs = [self.repo.changectx(n) for n in
626                self.repo.changelog.nodesbetween(outgoing,
627                                                 [h.node() for h in heads])[0]]
628
629        if localtip.rev() is None:
630            ctxs.append(localtip)
631
632        act = ActiveList(self, self._parenttip(heads, outnodes), ctxs)
633
634        self.activecache[parent] = act
635        return act
636
637    def pdiff(self, pats, opts, parent=None):
638        'Return diffs relative to PARENT, as best as we can make out'
639
640        parent = self.parent(parent)
641        act = self.active(parent)
642
643        #
644        # act.localtip maybe nil, in the case of uncommitted local
645        # changes.
646        #
647        if not act.revs:
648            return
649
650        matchfunc = cmdutil.match(self.repo, pats, opts)
651        opts = patch.diffopts(self.ui, opts)
652
653        return self.diff(act.parenttip.node(), act.localtip.node(),
654                         match=matchfunc, opts=opts)
655
656    def squishdeltas(self, active, message, user=None):
657        '''Create a single conglomerate changeset based on a given
658        active list.  Removes the original changesets comprising the
659        given active list, and any tags pointing to them.
660
661        Operation:
662
663          - Commit an activectx object representing the specified
664            active list,
665
666          - Remove any local tags pointing to changesets in the
667            specified active list.
668
669          - Remove the changesets comprising the specified active
670            list.
671
672          - Remove any metadata that may refer to changesets that were
673            removed.
674
675        Calling code is expected to hold both the working copy lock
676        and repository lock of the destination workspace
677        '''
678
679        def strip_local_tags(active):
680            '''Remove any local tags referring to the specified nodes.'''
681
682            if os.path.exists(self.repo.join('localtags')):
683                fh = None
684                try:
685                    fh = self.repo.opener('localtags')
686                    tags = active.prune_tags(fh)
687                    fh.close()
688
689                    fh = self.repo.opener('localtags', 'w', atomictemp=True)
690                    fh.writelines(tags)
691                    fh.rename()
692                finally:
693                    if fh and not fh.closed:
694                        fh.close()
695
696        if active.files():
697            for entry in active:
698                #
699                # Work around Mercurial issue #1666, if the source
700                # file of a rename exists in the working copy
701                # Mercurial will complain, and remove the file.
702                #
703                # We preemptively remove the file to avoid the
704                # complaint (the user was asked about this in
705                # cdm_recommit)
706                #
707                if entry.is_renamed():
708                    path = self.repo.wjoin(entry.parentname)
709                    if os.path.exists(path):
710                        os.unlink(path)
711
712            self.repo.commitctx(active.context(message, user))
713            wsstate = "recommitted"
714            destination = self.repo.changelog.tip()
715        else:
716            #
717            # If all we're doing is stripping the old nodes, we want to
718            # update the working copy such that we're not at a revision
719            # that's about to go away.
720            #
721            wsstate = "tip"
722            destination = active.parenttip.node()
723
724        self.clean(destination)
725
726        #
727        # Tags were elided by the activectx object.  Local tags,
728        # however, must be removed manually.
729        #
730        try:
731            strip_local_tags(active)
732        except EnvironmentError, e:
733            raise util.Abort('Could not recommit tags: %s\n' % e)
734
735        # Silence all the strip and update fun
736        self.ui.pushbuffer()
737
738        #
739        # Remove the active lists component changesets by stripping
740        # the base of any active branch (of which there may be
741        # several)
742        #
743        try:
744            try:
745                for base in active.bases():
746                    #
747                    # Any cached information about the repository is
748                    # likely to be invalid during the strip.  The
749                    # caching of branch tags is especially
750                    # problematic.
751                    #
752                    self.repo.invalidate()
753                    repair.strip(self.ui, self.repo, base.node(), backup=False)
754            except:
755                #
756                # If this fails, it may leave us in a surprising place in
757                # the history.
758                #
759                # We want to warn the user that something went wrong,
760                # and what will happen next, re-raise the exception, and
761                # bring the working copy back into a consistent state
762                # (which the finally block will do)
763                #
764                self.ui.warn("stripping failed, your workspace will have "
765                             "superfluous heads.\n"
766                             "your workspace has been updated to the "
767                             "%s changeset.\n" % wsstate)
768                raise               # Re-raise the exception
769        finally:
770            self.clean()
771            self.repo.dirstate.write() # Flush the dirstate
772            self.repo.invalidate()     # Invalidate caches
773
774            #
775            # We need to remove Hg's undo information (used for rollback),
776            # since it refers to data that will probably not exist after
777            # the strip.
778            #
779            if os.path.exists(self.repo.sjoin('undo')):
780                try:
781                    os.unlink(self.repo.sjoin('undo'))
782                except EnvironmentError, e:
783                    raise util.Abort('failed to remove undo data: %s\n' % e)
784
785            self.ui.popbuffer()
786
787    def filepath(self, path):
788        'Return the full path to a workspace file.'
789        return self.repo.pathto(path)
790
791    def clean(self, rev=None):
792        '''Bring workspace up to REV (or tip) forcefully (discarding in
793        progress changes)'''
794
795        if rev != None:
796            rev = self.repo.lookup(rev)
797        else:
798            rev = self.repo.changelog.tip()
799
800        hg.clean(self.repo, rev, show_stats=False)
801
802    def mq_applied(self):
803        '''True if the workspace has Mq patches applied'''
804        q = mq.queue(self.ui, self.repo.join(''))
805        return q.applied
806
807    def workingctx(self):
808        return self.repo.changectx(None)
809
810    def diff(self, node1=None, node2=None, match=None, opts=None):
811        ret = cStringIO.StringIO()
812        try:
813            for chunk in patch.diff(self.repo, node1, node2, match=match,
814                                    opts=opts):
815                ret.write(chunk)
816        finally:
817            # Workaround Hg bug 1651
818            if not Version.at_least("1.3"):
819                self.repo.dirstate.invalidate()
820
821        return ret.getvalue()
822