xref: /freebsd/contrib/lib9p/pytest/protocol.py (revision f3087bef11543b42e0d69b708f367097a4118d24)
1#! /usr/bin/env python
2
3"""
4Protocol definitions for python based lib9p server/client.
5
6The sub-namespace td has type definitions (qid, stat) and values
7that are "#define" constants in C code (e.g., DMDIR, QTFILE, etc).
8This also contains the byte values for protocol codes like Tversion,
9Rversion, Rerror, and so on.
10
11    >>> td.Tversion
12    100
13    >>> td.Rlerror
14    7
15
16The qid and stat types are PFOD classes and generate instances that
17are a cross between namedtuple and OrderedDictionary (see pfod.py
18for details):
19
20    >>> td.qid(type=td.QTFILE, path=2, version=1)
21    qid(type=0, version=1, path=2)
22
23The td.stat() type output is pretty long, since it has all the
24dotu-specific members (used only when packing for dotu/dotl and
25set only when unpacking those), so here's just one field:
26
27    >>> td.stat(*(15 * [0])).mode
28    0
29    >>> import pprint; pprint.pprint(td.stat()._fields)
30    ('type',
31     'dev',
32     'qid',
33     'mode',
34     'atime',
35     'mtime',
36     'length',
37     'name',
38     'uid',
39     'gid',
40     'muid',
41     'extension',
42     'n_uid',
43     'n_gid',
44     'n_muid')
45
46Stat objects sent across the protocol must first be encoded into
47wirestat objects, which are basically size-counted pre-sequenced
48stat objects.  The pre-sequencing uses:
49
50    >>> td.stat_seq
51    Sequencer('stat')
52
53For parsing bytes returned in a Tread on a directory, td.wirestat_seq
54is the sequencer.  However, most users should rely on the packers and
55unpackers in each protocol (see {pack,unpack}_wirestat below).
56
57    >>> td.wirestat_seq
58    Sequencer('wirestat')
59
60There is a dictionary fcall_to_name that maps from byte value
61to protocol code.  Names map to themselves as well:
62
63    >>> fcall_names[101]
64    'Rversion'
65    >>> fcall_names['Tversion']
66    'Tversion'
67
68The sub-namespace rrd has request (Tversion, Topen, etc) and
69response (Rversion, Ropen, etc) data definitions.  Each of these
70is a PFOD class:
71
72    >>> rrd.Tversion(1000, 'hello', tag=0)
73    Tversion(tag=0, msize=1000, version='hello')
74
75The function p9_version() looks up the instance of each supported
76protocol, or raises a KeyError when given an invalid protocol.
77The names may be spelled in any mixture of cases.
78
79The names plain, dotu, and dotl are predefined as the three
80supported protocols:
81
82    >>> p9_version('invalid')
83    Traceback (most recent call last):
84        ...
85    KeyError: 'invalid'
86    >>> p9_version('9p2000') == plain
87    True
88    >>> p9_version('9P2000') == plain
89    True
90    >>> p9_version('9P2000.u') == dotu
91    True
92    >>> p9_version('9p2000.L') == dotl
93    True
94
95Protocol instances have a pack() method that encodes a set of
96arguments into a packet.  To know what to encode, pack() must
97receive an fcall value and a dictionary containing argument
98values, or something equivalent.  The required argument values
99depend on the fcall.  For instance, a Tversion fcall needs three
100arguments: the version name, the tag, and the msize (these of
101course are the pre-filled fields in a Tversion PFOD instance).
102
103    >>> args = {'version': '!', 'tag': 1, 'msize': 1000}
104    >>> pkt = dotu.pack(fcall='Tversion', args=args)
105    >>> len(pkt)
106    14
107
108The length of string '!' is 1, and the packet (or wire) format of
109a Tversion request is:
110
111   size[4] fcall[1] tag[2] msize[4] version[s]
112
113which corresponds to a struct's IBHIH (for the fixed size parts)
114followed by 1 B (for the string).  The overall packet is 14 bytes
115long, so we have size=9, fcall=100, tag=1, msize=1000, and the
116version string is length=1, value=33 (ord('!')).
117
118    >>> import struct
119    >>> struct.unpack('<IBHIHB', pkt)
120    (14, 100, 1, 1000, 1, 33)
121
122Of course, this packed a completely bogus "version" string, but
123that's what we told it to do.  Protocol instances remember their
124version, so we can get it right by omitting the version from the
125arguments:
126
127    >>> dotu.version
128    '9P2000.u'
129    >>> args = {'tag': 99, 'msize': 1000}
130    >>> pkt = dotu.pack(fcall='Tversion', args=args)
131    >>> len(pkt)
132    21
133
134The fcall can be supplied numerically:
135
136    >>> pkt2 = dotu.pack(fcall=td.Tversion, args=args)
137    >>> pkt == pkt2
138    True
139
140Instead of providing an fcall you can provide an instance of
141the appropriate PFOD.  In this case pack() finds the type from
142the PFOD instance.  As usual, the version parameter is filled in
143for you:
144
145    >>> pkt2 = dotu.pack(rrd.Tversion(tag=99, msize=1000))
146    >>> pkt == pkt2
147    True
148
149Note that it's up to you to check the other end's version and
150switch to a "lower" protocol as needed.  Each instance does provide
151a downgrade_to() method that gets you a possibly-downgraded instance.
152This will fail if you are actually trying to upgrade, and also if
153you provide a bogus version:
154
155    >>> dotu.downgrade_to('9P2000.L')
156    Traceback (most recent call last):
157        ...
158    KeyError: '9P2000.L'
159    >>> dotu.downgrade_to('we never heard of this protocol')
160    Traceback (most recent call last):
161        ...
162    KeyError: 'we never heard of this protocol'
163
164Hence you might use:
165
166    try:
167        proto = protocol.dotl.downgrade(vstr)
168    except KeyError:
169        pkt = protocol.plain.pack(fcall='Rerror',
170            args={'tag': tag, 'errstr': 'unknown protocol version '
171                    '{0!r}'.format(vstr)})
172    else:
173        pkt = proto.pack(fcall='Rversion', args={'tag': tag, 'msize': msize})
174
175When using a PFOD instance, it is slightly more efficient to use
176pack_from():
177
178    try:
179        proto = protocol.dotl.downgrade(vstr)
180        reply = protocol.rrd.Rversion(tag=tag, msize=msize)
181    except KeyError:
182        proto = protocol.plain
183        reply = protocol.rrd.Rerror(tag=tag,
184            errstr='unknown protocol version {0!r}'.format(vstr))
185    pkt = proto.pack_from(reply)
186
187does the equivalent of the try/except/else variant.  Note that
188the protocol.rrd.Rversion() instance has version=None.  Like
189proto.pack, the pack_from will detect this "missing" value and
190fill it in.
191
192Because errors vary (one should use Rlerror for dotl and Rerror
193for dotu and plain), and it's convenient to use an Exception
194instance for an error, all protocols provide .error().  This
195builds the appropriate kind of error response, extracting and
196converting errno's and error messages as appropriate.
197
198If <err> is an instance of Exception, err.errno provides the errnum
199or ecode value (if used, for dotu and dotl) and err.strerror as the
200errstr value (if used, for plain 9p2000).  Otherwise err should be
201an integer, and we'll use os.strerror() to get a message.
202
203When using plain 9P2000 this sends error *messages*:
204
205    >>> import errno, os
206    >>> utf8 = os.strerror(errno.ENOENT).encode('utf-8')
207    >>> pkt = None
208    >>> try:
209    ...     os.open('presumably this file does not exist here', 0)
210    ... except OSError as err:
211    ...     pkt = plain.error(1, err)
212    ...
213    >>> pkt[-len(utf8):] == utf8
214    True
215    >>> pkt2 = plain.error(1, errno.ENOENT)
216    >>> pkt == pkt2
217    True
218
219When using 9P2000.u it sends the error code as well, and when
220using 9P2000.L it sends only the error code (and more error
221codes can pass through):
222
223    >>> len(pkt)
224    34
225    >>> len(dotu.error(1, errno.ENOENT))
226    38
227    >>> len(dotl.error(1, errno.ENOENT))
228    11
229
230For even more convenience (and another slight speed hack), the
231protocol has member functions for each valid pfod, which
232effectively do a pack_from of a pfod built from the arguments.  In
233the above example this is not very useful (because we want two
234different replies), but for Rlink, for instance, which has only
235a tag, a server might implement Tlink() as:
236
237    def do_Tlink(proto, data): # data will be a protocol.rrd.Tlink(...)
238        tag = data.tag
239        dfid = data.dfid
240        fid = data.fid
241        name = data.name
242        ... some code to set up for doing the link link ...
243        try:
244            os.link(path1, path2)
245        except OSError as err:
246            return proto.error(tag, err)
247        else:
248            return proto.Rlink(tag)
249
250    >>> pkt = dotl.Rlink(12345)
251    >>> struct.unpack('<IBH', pkt)
252    (7, 71, 12345)
253
254Similarly, a client can build a Tversion packet quite trivially:
255
256    >>> vpkt = dotl.Tversion(tag=0, msize=12345)
257
258To see that this is a valid version packet, let's unpack its bytes.
259The overall length is 21 bytes: 4 bytes of size, 1 byte of code 100
260for Tversion, 2 bytes of tag, 4 bytes of msize, 2 bytes of string
261length, and 8 bytes of string '9P2000.L'.
262
263    >>> tup = struct.unpack('<IBHIH8B', vpkt)
264    >>> tup[0:5]
265    (21, 100, 0, 12345, 8)
266    >>> ''.join(chr(i) for i in tup[5:])
267    '9P2000.L'
268
269Of course, since you can *pack*, you can also *unpack*.  It's
270possible that the incoming packet is malformed.  If so, this
271raises various errors (see below).
272
273Unpack is actually a two step process: first we unpack a header
274(where the size is already removed and is implied by len(data)),
275then we unpack the data within the packet.  You can invoke the
276first step separately.  Furthermore, there's a noerror argument
277that leaves some fields set to None or empty strings, if the
278packet is too short.  (Note that we need a hack for py2k vs py3k
279strings here, for doctests.  Also, encoding 12345 into a byte
280string produces '90', by ASCII luck!)
281
282    >>> pkt = pkt[4:] # strip generated size
283    >>> import sys
284    >>> py3k = sys.version_info[0] >= 3
285    >>> b2s = lambda x: x.decode('utf-8') if py3k else x
286    >>> d = plain.unpack_header(pkt[0:1], noerror=True)
287    >>> d.data = b2s(d.data)
288    >>> d
289    Header(size=5, dsize=0, fcall=71, data='')
290    >>> d = plain.unpack_header(pkt[0:2], noerror=True)
291    >>> d.data = b2s(d.data)
292    >>> d
293    Header(size=6, dsize=1, fcall=71, data='9')
294
295Without noerror=True a short packet raises a SequenceError:
296
297    >>> plain.unpack_header(pkt[0:0])   # doctest: +IGNORE_EXCEPTION_DETAIL
298    Traceback (most recent call last):
299        ...
300    SequenceError: out of data while unpacking 'fcall'
301
302Of course, a normal packet decodes fine:
303
304    >>> d = plain.unpack_header(pkt)
305    >>> d.data = b2s(d.data)
306    >>> d
307    Header(size=7, dsize=2, fcall=71, data='90')
308
309but one that is too *long* potentially raises a SequencError.
310(This is impossible for a header, though, since the size and
311data size are both implied: either there is an fcall code, and
312the rest of the bytes are "data", or there isn't and the packet
313is too short.  So we can only demonstrate this for regular
314unpack; see below.)
315
316Note that all along, this has been decoding Rlink (fcall=71),
317which is not valid for plain 9P2000 protocol.  It's up to the
318caller to check:
319
320    >>> plain.supports(71)
321    False
322
323    >>> plain.unpack(pkt)           # doctest: +IGNORE_EXCEPTION_DETAIL
324    Traceback (most recent call last):
325        ...
326    SequenceError: invalid fcall 'Rlink' for 9P2000
327    >>> dotl.unpack(pkt)
328    Rlink(tag=12345)
329
330However, the unpack() method DOES check that the fcall type is
331valid, even if you supply noerror=True.  This is because we can
332only really decode the header, not the data, if the fcall is
333invalid:
334
335    >>> plain.unpack(pkt, noerror=True)     # doctest: +IGNORE_EXCEPTION_DETAIL
336    Traceback (most recent call last):
337        ...
338    SequenceError: invalid fcall 'Rlink' for 9P2000
339
340The same applies to much-too-short packets even if noerror is set.
341Specifically, if the (post-"size") header shortens down to the empty
342string, the fcall will be None:
343
344    >>> dotl.unpack(b'', noerror=True)      # doctest: +IGNORE_EXCEPTION_DETAIL
345    Traceback (most recent call last):
346        ...
347    SequenceError: invalid fcall None for 9P2000.L
348
349If there is at least a full header, though, noerror will do the obvious:
350
351    >>> dotl.unpack(pkt[0:1], noerror=True)
352    Rlink(tag=None)
353    >>> dotl.unpack(pkt[0:2], noerror=True)
354    Rlink(tag=None)
355
356If the packet is too long, noerror suppresses the SequenceError:
357
358    >>> dotl.unpack(pkt + b'x')             # doctest: +IGNORE_EXCEPTION_DETAIL
359    Traceback (most recent call last):
360        ...
361    SequenceError: 1 byte(s) unconsumed
362    >>> dotl.unpack(pkt + b'x', noerror=True)
363    Rlink(tag=12345)
364
365To pack a stat object when producing data for reading a directory,
366use pack_wirestat.  This puts a size in front of the packed stat
367data (they're represented this way in read()-of-directory data,
368but not elsewhere).
369
370To unpack the result of a Tstat or a read() on a directory, use
371unpack_wirestat.  The stat values are variable length so this
372works with offsets.  If the packet is truncated, you'll get a
373SequenceError, but just as for header unpacking, you can use
374noerror to suppress this.
375
376(First, we'll need to build some valid packet data.)
377
378    >>> statobj = td.stat(type=0,dev=0,qid=td.qid(0,0,0),mode=0,
379    ... atime=0,mtime=0,length=0,name=b'foo',uid=b'0',gid=b'0',muid=b'0')
380    >>> data = plain.pack_wirestat(statobj)
381    >>> len(data)
382    55
383
384Now we can unpack it:
385
386    >>> newobj, offset = plain.unpack_wirestat(data, 0)
387    >>> newobj == statobj
388    True
389    >>> offset
390    55
391
392Since the packed data do not include the dotu extensions, we get
393a SequenceError if we try to unpack with dotu or dotl:
394
395    >>> dotu.unpack_wirestat(data, 0)       # doctest: +IGNORE_EXCEPTION_DETAIL
396    Traceback (most recent call last):
397        ...
398    SequenceError: out of data while unpacking 'extension'
399
400When using noerror, the returned new offset will be greater
401than the length of the packet, after a failed unpack, and some
402elements may be None:
403
404    >>> newobj, offset = plain.unpack_wirestat(data[0:10], 0, noerror=True)
405    >>> offset
406    55
407    >>> newobj.length is None
408    True
409
410Similarly, use unpack_dirent to unpack the result of a dot-L
411readdir(), using offsets.  (Build them with pack_dirent.)
412
413    >>> dirent = td.dirent(qid=td.qid(1,2,3),offset=0,
414    ... type=td.DT_REG,name=b'foo')
415    >>> pkt = dotl.pack_dirent(dirent)
416    >>> len(pkt)
417    27
418
419and then:
420
421    >>> newde, offset = dotl.unpack_dirent(pkt, 0)
422    >>> newde == dirent
423    True
424    >>> offset
425    27
426
427"""
428
429from __future__ import print_function
430
431import collections
432import os
433import re
434import sys
435
436import p9err
437import pfod
438import sequencer
439
440SequenceError = sequencer.SequenceError
441
442fcall_names = {}
443
444# begin ???
445# to interfere with (eg) the size part of the packet:
446#   pkt = proto.pack(fcall=protocol.td.Tversion,
447#       size=123, # wrong
448#       args={ 'tag': 1, msize: 1000, version: '9p2000.u' })
449# a standard Twrite:
450#   pkt = proto.pack(fcall=protocol.td.Twrite,
451#       args={ 'tag': 1, 'fid': 2, 'offset': 0, 'data': b'rawdata' })
452# or:
453#   pkt = proto.pack(fcall=protocol.td.Twrite,
454#       data=proto.Twrite(tag=1, fid=2, offset=0, data=b'rawdata' })
455# a broken Twrite:
456#   pkt = proto.pack(fcall=protocol.td.Twrite,
457#       args={ 'tag': 1, 'fid': 2, 'offset': 0, 'count': 99,
458#           'data': b'rawdata' })  -- XXX won't work (yet?)
459#
460# build a QID: (td => typedefs and defines)
461#    qid = protocol.td.qid(type=protocol.td.QTFILE, version=1, path=2)
462# build the Twrite data as a data structure:
463#    wrdata = protocol.td.Twrite(tag=1, fid=2, offset=0, data=b'rawdata')
464#
465# turn incoming byte stream data into a Header and remaining data:
466#    foo = proto.pack(data)
467
468class _PackInfo(object):
469    """
470    Essentially just a Sequencer, except that we remember
471    if there are any :auto annotations on any of the coders,
472    and we check for coders that are string coders ('data[size]').
473
474    This could in theory be a recursive check, but in practice
475    all the automatics are at the top level, and we have no mechanism
476    to pass down inner automatics.
477    """
478    def __init__(self, seq):
479        self.seq = seq
480        self.autos = None
481        for pair in seq:        # (cond, code) pair
482            sub = pair[1]
483            if sub.aux is None:
484                continue
485            assert sub.aux == 'auto' or sub.aux == 'len'
486            if self.autos is None:
487                self.autos = []
488            self.autos.append(pair)
489
490    def __repr__(self):
491        return '{0}({1!r})'.format(self.__class__.__name__, self.seq)
492
493    def pack(self, auto_vars, conditions, data, rodata):
494        """
495        Pack data.  Insert automatic and/or counted variables
496        automatically, if they are not already set in the data.
497
498        If rodata ("read-only data") is True we make sure not
499        to modify the caller's data.  Since data is a PFOD rather
500        than a normal ordered dictionary, we use _copy().
501        """
502        if self.autos:
503            for cond, sub in self.autos:
504                # False conditionals don't need to be filled-in.
505                if cond is not None and not conditions[cond]:
506                    continue
507                if sub.aux == 'auto':
508                    # Automatic variable, e.g., version.  The
509                    # sub-coder's name ('version') is the test item.
510                    if data.get(sub.name) is None:
511                        if rodata:
512                            data = data._copy()
513                            rodata = False
514                        data[sub.name] = auto_vars[sub.name]
515                else:
516                    # Automatic length, e.g., data[count].  The
517                    # sub-coders's repeat item ('count') is the
518                    # test item.  Of course, it's possible that
519                    # the counted item is missing as well.  If so
520                    # we just leave both None and take the
521                    # encoding error.
522                    assert sub.aux == 'len'
523                    if data.get(sub.repeat) is not None:
524                        continue
525                    item = data.get(sub.name)
526                    if item is not None:
527                        if rodata:
528                            data = data._copy()
529                            rodata = False
530                        data[sub.repeat] = len(item)
531        return self.seq.pack(data, conditions)
532
533class _P9Proto(object):
534    def __init__(self, auto_vars, conditions, p9_data, pfods, index):
535        self.auto_vars = auto_vars      # currently, just version
536        self.conditions = conditions    # '.u'
537        self.pfods = pfods # dictionary, maps pfod to packinfo
538        self.index = index # for comparison: plain < dotu < dotl
539
540        self.use_rlerror = rrd.Rlerror in pfods
541
542        for dtype in pfods:
543            name = dtype.__name__
544            # For each Txxx/Rxxx, define a self.<name>() to
545            # call self.pack_from().
546            #
547            # The packinfo is from _Packinfo(seq); the fcall and
548            # seq come from p9_data.protocol[<name>].
549            proto_tuple = p9_data.protocol[name]
550            assert dtype == proto_tuple[0]
551            packinfo = pfods[dtype]
552            # in theory we can do this with no names using nested
553            # lambdas, but that's just too confusing, so let's
554            # do it with nested functions instead.
555            def builder(constructor=dtype, packinfo=packinfo):
556                "return function that calls _pack_from with built PFOD"
557                def invoker(self, *args, **kwargs):
558                    "build PFOD and call _pack_from"
559                    return self._pack_from(constructor(*args, **kwargs),
560                                           rodata=False, caller=None,
561                                           packinfo=packinfo)
562                return invoker
563            func = builder()
564            func.__name__ = name
565            func.__doc__ = 'pack from {0}'.format(name)
566            setattr(self.__class__, name, func)
567
568    def __repr__(self):
569        return '{0}({1!r})'.format(self.__class__.__name__, self.version)
570
571    def __str__(self):
572        return self.version
573
574    # define rich-comparison operators, so we can, e.g., test vers > plain
575    def __lt__(self, other):
576        return self.index < other.index
577    def __le__(self, other):
578        return self.index <= other.index
579    def __eq__(self, other):
580        return self.index == other.index
581    def __ne__(self, other):
582        return self.index != other.index
583    def __gt__(self, other):
584        return self.index > other.index
585    def __ge__(self, other):
586        return self.index >= other.index
587
588    def downgrade_to(self, other_name):
589        """
590        Downgrade from this protocol to a not-greater one.
591
592        Raises KeyError if other_name is not a valid protocol,
593        or this is not a downgrade (with setting back to self
594        considered a valid "downgrade", i.e., we're doing subseteq
595        rather than subset).
596        """
597        if not isinstance(other_name, str) and isinstance(other_name, bytes):
598            other_name = other_name.decode('utf-8', 'surrogateescape')
599        other = p9_version(other_name)
600        if other > self:
601            raise KeyError(other_name)
602        return other
603
604    def error(self, tag, err):
605        "produce Rerror or Rlerror, whichever is appropriate"
606        if isinstance(err, Exception):
607            errnum = err.errno
608            errmsg = err.strerror
609        else:
610            errnum = err
611            errmsg = os.strerror(errnum)
612        if self.use_rlerror:
613            return self.Rlerror(tag=tag, ecode=p9err.to_dotl(errnum))
614        return self.Rerror(tag=tag, errstr=errmsg,
615                           errnum=p9err.to_dotu(errnum))
616
617    def pack(self, *args, **kwargs):
618        "pack up a pfod or fcall-and-arguments"
619        fcall = kwargs.pop('fcall', None)
620        if fcall is None:
621            # Called without fcall=...
622            # This requires that args have one argument that
623            # is the PFOD; kwargs should be empty (but we'll take
624            # data=pfod as well).  The size is implied, and
625            # fcall comes from the pfod.
626            data = kwargs.pop('data', None)
627            if data is None:
628                if len(args) != 1:
629                    raise TypeError('pack() with no fcall requires 1 argument')
630                data = args[0]
631            if len(kwargs):
632                raise TypeError('pack() got an unexpected keyword argument '
633                                '{0}'.format(kwargs.popitem()[0]))
634            return self._pack_from(data, True, 'pack', None)
635
636        # Called as pack(fcall=whatever, data={...}).
637        # The data argument must be a dictionary since we're going to
638        # apply ** to it in the call to build the PFOD.  Note that
639        # it could already be a PFOD, which is OK, but we're going to
640        # copy it to a new one regardless (callers that have a PFOD
641        # should use pack_from instead).
642        if len(args):
643            raise TypeError('pack() got unexpected arguments '
644                            '{0!r}'.format(args))
645        data = kwargs.pop('args', None)
646        if len(kwargs):
647            raise TypeError('pack() got an unexpected keyword argument '
648                            '{0}'.format(kwargs.popitem()[0]))
649        if not isinstance(data, dict):
650            raise TypeError('pack() with fcall and data '
651                            'requires data to be a dictionary')
652        try:
653            name = fcall_names[fcall]
654        except KeyError:
655            raise TypeError('pack(): {0} is not a valid '
656                            'fcall value'.format(fcall))
657        cls = getattr(rrd, name)
658        data = cls(**data)
659        return self._pack_from(data, False, 'pack', None)
660
661    def pack_from(self, data):
662        "pack from pfod data, using its type to determine fcall"
663        return self._pack_from(data, True, 'pack_from', None)
664
665    def _pack_from(self, data, rodata, caller, packinfo):
666        """
667        Internal pack(): called from both invokers (self.Tversion,
668        self.Rwalk, etc.) and from pack and pack_from methods.
669        "caller" says which.  If rodata is True we're not supposed to
670        modify the incoming data, as it may belong to someone
671        else.  Some calls to pack() build a PFOD and hence pass in
672        False.
673
674        The predefined invokers pass in a preconstructed PFOD,
675        *and* set rodata=False, *and* provide a packinfo, so that
676        we never have to copy, nor look up the packinfo.
677        """
678        if caller is not None:
679            assert caller in ('pack', 'pack_from') and packinfo is None
680            # Indirect call from pack_from(), or from pack() after
681            # pack() built a PFOD.  We make sure this kind of PFOD
682            # is allowed for this protocol.
683            packinfo = self.pfods.get(data.__class__, None)
684            if packinfo is None:
685                raise TypeError('{0}({1!r}): invalid '
686                                'input'.format(caller, data))
687
688        # Pack the data
689        pkt = packinfo.pack(self.auto_vars, self.conditions, data, rodata)
690
691        fcall = data.__class__.__name__
692        fcall_code = getattr(td, fcall)
693
694        # That's the inner data; now we must add the header,
695        # with fcall (translated back to byte code value) and
696        # outer data.  The size is implied by len(pkt).  There
697        # are no other auto variables, and no conditions.
698        #
699        # NB: the size includes the size of the header itself
700        # and the fcall code byte, plus the size of the data.
701        data = _9p_data.header_pfod(size=4 + 1 + len(pkt), dsize=len(pkt),
702                                    fcall=fcall_code, data=pkt)
703        empty = None # logically should be {}, but not actually used below
704        pkt = _9p_data.header_pack_seq.pack(data, empty)
705        return pkt
706
707    @staticmethod
708    def unpack_header(bstring, noerror=False):
709        """
710        Unpack header.
711
712        We know that our caller has already stripped off the
713        overall size field (4 bytes), leaving us with the fcall
714        (1 byte) and data (len(bstring)-1 bytes).  If len(bstring)
715        is 0, this is an invalid header: set dsize to 0 and let
716        fcall become None, if noerror is set.
717        """
718        vdict = _9p_data.header_pfod()
719        vdict['size'] = len(bstring) + 4
720        vdict['dsize'] = max(0, len(bstring) - 1)
721        _9p_data.header_unpack_seq.unpack(vdict, None, bstring, noerror)
722        return vdict
723
724    def unpack(self, bstring, noerror=False):
725        "produce filled PFOD from fcall in packet"
726        vdict = self.unpack_header(bstring, noerror)
727        # NB: vdict['dsize'] is used internally during unpack, to
728        # find out how many bytes to copy to vdict['data'], but by
729        # the time unpack is done, we no longer need it.
730        #
731        # size = vdict['size']
732        # dsize = vdict['dsize']
733        fcall = vdict['fcall']
734        data = vdict['data']
735        # Note: it's possible for size and/or fcall to be None,
736        # when noerror is true.  However, if we support fcall, then
737        # clearly fcall is not None; and since fcall follows size,
738        # we can always proceed if we support fcall.
739        if self.supports(fcall):
740            fcall = fcall_names[fcall]
741            cls = getattr(rrd, fcall)
742            seq = self.pfods[cls].seq
743        elif fcall == td.Rlerror:
744            # As a special case for diod, we accept Rlerror even
745            # if it's not formally part of the protocol.
746            cls = rrd.Rlerror
747            seq = dotl.pfods[rrd.Rlerror].seq
748        else:
749            fcall = fcall_names.get(fcall, fcall)
750            raise SequenceError('invalid fcall {0!r} for '
751                                '{1}'.format(fcall, self))
752        vdict = cls()
753        seq.unpack(vdict, self.conditions, data, noerror)
754        return vdict
755
756    def pack_wirestat(self, statobj):
757        """
758        Pack a stat object to appear as data returned by read()
759        on a directory.  Essentially, we prefix the data with a size.
760        """
761        data = td.stat_seq.pack(statobj, self.conditions)
762        return td.wirestat_seq.pack({'size': len(data), 'data': data}, {})
763
764    def unpack_wirestat(self, bstring, offset, noerror=False):
765        """
766        Produce the next td.stat object from byte-string,
767        returning it and new offset.
768        """
769        statobj = td.stat()
770        d = { 'size': None }
771        newoff = td.wirestat_seq.unpack_from(d, self.conditions, bstring,
772                                             offset, noerror)
773        size = d['size']
774        if size is None:        # implies noerror; newoff==offset+2
775            return statobj, newoff
776        # We now have size and data.  If noerror, data might be
777        # too short, in which case we'll unpack a partial statobj.
778        # Or (with or without noeror), data might be too long, so
779        # that while len(data) == size, not all the data get used.
780        # That may be allowed by the protocol: it's not clear.
781        data = d['data']
782        used = td.stat_seq.unpack_from(statobj, self.conditions, data,
783                                       0, noerror)
784        # if size != used ... then what?
785        return statobj, newoff
786
787    def pack_dirent(self, dirent):
788        """
789        Dirents (dot-L only) are easy to pack, but we provide
790        this function for symmetry.  (Should we raise an error
791        if called on plain or dotu?)
792        """
793        return td.dirent_seq.pack(dirent, self.conditions)
794
795    def unpack_dirent(self, bstring, offset, noerror=False):
796        """
797        Produces the next td.dirent object from byte-string,
798        returning it and new offset.
799        """
800        deobj = td.dirent()
801        offset = td.dirent_seq.unpack_from(deobj, self.conditions, bstring,
802                                           offset, noerror)
803        return deobj, offset
804
805    def supports(self, fcall):
806        """
807        Return True if and only if this protocol supports the
808        given fcall.
809
810        >>> plain.supports(100)
811        True
812        >>> plain.supports('Tversion')
813        True
814        >>> plain.supports('Rlink')
815        False
816        """
817        fcall = fcall_names.get(fcall, None)
818        if fcall is None:
819            return False
820        cls = getattr(rrd, fcall)
821        return cls in self.pfods
822
823    def get_version(self, as_bytes=True):
824        "get Plan 9 protocol version, as string or (default) as bytes"
825        ret = self.auto_vars['version']
826        if as_bytes and not isinstance(ret, bytes):
827            ret = ret.encode('utf-8')
828        return ret
829
830    @property
831    def version(self):
832        "Plan 9 protocol version"
833        return self.get_version(as_bytes=False)
834
835DEBUG = False
836
837# This defines a special en/decoder named "s" using a magic
838# builtin.  This and stat are the only variable-length
839# decoders, and this is the only recursively-variable-length
840# one (i.e., stat decoding is effectively fixed size once we
841# handle strings).  So this magic avoids the need for recursion.
842#
843# Note that _string_ is, in effect, size[2] orig_var[size].
844_STRING_MAGIC = '_string_'
845SDesc = "typedef s: " + _STRING_MAGIC
846
847# This defines an en/decoder for type "qid",
848# which en/decodes 1 byte called type, 4 called version, and
849# 8 called path (for a total of 13 bytes).
850#
851# It also defines QTDIR, QTAPPEND, etc.  (These are not used
852# for en/decode, or at least not yet.)
853QIDDesc = """\
854typedef qid: type[1] version[4] path[8]
855
856    #define QTDIR       0x80
857    #define QTAPPEND    0x40
858    #define QTEXCL      0x20
859    #define QTMOUNT     0x10
860    #define QTAUTH      0x08
861    #define QTTMP       0x04
862    #define QTSYMLINK   0x02
863    #define QTFILE      0x00
864"""
865
866# This defines a stat decoder, which has a 9p2000 standard front,
867# followed by an optional additional portion.
868#
869# The constants are named DMDIR etc.
870STATDesc = """
871typedef stat: type[2] dev[4] qid[qid] mode[4] atime[4] mtime[4] \
872length[8] name[s] uid[s] gid[s] muid[s] \
873{.u: extension[s] n_uid[4] n_gid[4] n_muid[4] }
874
875    #define DMDIR           0x80000000
876    #define DMAPPEND        0x40000000
877    #define DMMOUNT         0x10000000
878    #define DMAUTH          0x08000000
879    #define DMTMP           0x04000000
880    #define DMSYMLINK       0x02000000
881            /* 9P2000.u extensions */
882    #define DMDEVICE        0x00800000
883    #define DMNAMEDPIPE     0x00200000
884    #define DMSOCKET        0x00100000
885    #define DMSETUID        0x00080000
886    #define DMSETGID        0x00040000
887"""
888
889# This defines a wirestat decoder.  A wirestat is a size and then
890# a (previously encoded, or future-decoded) stat.
891WirestatDesc = """
892typedef wirestat: size[2] data[size]
893"""
894
895# This defines a dirent decoder, which has a dot-L specific format.
896#
897# The dirent type fields are defined as DT_* (same as BSD and Linux).
898DirentDesc = """
899typedef dirent: qid[qid] offset[8] type[1] name[s]
900
901    #define DT_UNKNOWN       0
902    #define DT_FIFO          1
903    #define DT_CHR           2
904    #define DT_DIR           4
905    #define DT_BLK           6
906    #define DT_REG           8
907    #define DT_LNK          10
908    #define DT_SOCK         12
909    #define DT_WHT          14
910"""
911
912# N.B.: this is largely a slightly more rigidly formatted variant of
913# the contents of:
914# https://github.com/chaos/diod/blob/master/protocol.md
915#
916# Note that <name> = <value>: ... assigns names for the fcall
917# fcall (function call) table.  Names without "= value" are
918# assumed to be the previous value +1 (and the two names are
919# also checked to make sure they are Tfoo,Rfoo).
920ProtocolDesc = """\
921Rlerror.L = 7: tag[2] ecode[4]
922    ecode is a numerical Linux errno
923
924Tstatfs.L = 8: tag[2] fid[4]
925Rstatfs.L: tag[2] type[4] bsize[4] blocks[8] bfree[8] bavail[8] \
926         files[8] ffree[8] fsid[8] namelen[4]
927    Rstatfs corresponds to Linux statfs structure:
928    struct statfs {
929        long    f_type;     /* type of file system */
930        long    f_bsize;    /* optimal transfer block size */
931        long    f_blocks;   /* total data blocks in file system */
932        long    f_bfree;    /* free blocks in fs */
933        long    f_bavail;   /* free blocks avail to non-superuser */
934        long    f_files;    /* total file nodes in file system */
935        long    f_ffree;    /* free file nodes in fs */
936        fsid_t  f_fsid;     /* file system id */
937        long    f_namelen;  /* maximum length of filenames */
938    };
939
940    This comes from nowhere obvious...
941        #define FSTYPE      0x01021997
942
943Tlopen.L = 12: tag[2] fid[4] flags[4]
944Rlopen.L: tag[2] qid[qid] iounit[4]
945    lopen prepares fid for file (or directory) I/O.
946
947    flags contains Linux open(2) flag bits, e.g., O_RDONLY, O_RDWR, O_WRONLY.
948
949        #define L_O_CREAT       000000100
950        #define L_O_EXCL        000000200
951        #define L_O_NOCTTY      000000400
952        #define L_O_TRUNC       000001000
953        #define L_O_APPEND      000002000
954        #define L_O_NONBLOCK    000004000
955        #define L_O_DSYNC       000010000
956        #define L_O_FASYNC      000020000
957        #define L_O_DIRECT      000040000
958        #define L_O_LARGEFILE   000100000
959        #define L_O_DIRECTORY   000200000
960        #define L_O_NOFOLLOW    000400000
961        #define L_O_NOATIME     001000000
962        #define L_O_CLOEXEC     002000000
963        #define L_O_SYNC        004000000
964        #define L_O_PATH        010000000
965        #define L_O_TMPFILE     020000000
966
967Tlcreate.L = 14: tag[2] fid[4] name[s] flags[4] mode[4] gid[4]
968Rlcreate.L: tag[2] qid[qid] iounit[4]
969    lcreate creates a regular file name in directory fid and prepares
970    it for I/O.
971
972    fid initially represents the parent directory of the new file.
973    After the call it represents the new file.
974
975    flags contains Linux open(2) flag bits (including O_CREAT).
976
977    mode contains Linux creat(2) mode (permissions) bits.
978
979    gid is the effective gid of the caller.
980
981Tsymlink.L = 16: tag[2] dfid[4] name[s] symtgt[s] gid[4]
982Rsymlink.L: tag[2] qid[qid]
983    symlink creates a symbolic link name in directory dfid.  The
984    link will point to symtgt.
985
986    gid is the effective group id of the caller.
987
988    The qid for the new symbolic link is returned in the reply.
989
990Tmknod.L = 18: tag[2] dfid[4] name[s] mode[4] major[4] minor[4] gid[4]
991Rmknod.L: tag[2] qid[qid]
992    mknod creates a device node name in directory dfid with major
993    and minor numbers.
994
995    mode contains Linux mknod(2) mode bits.  (Note that these
996    include the S_IFMT bits which may be S_IFBLK, S_IFCHR, or
997    S_IFSOCK.)
998
999    gid is the effective group id of the caller.
1000
1001    The qid for the new device node is returned in the reply.
1002
1003Trename.L = 20: tag[2] fid[4] dfid[4] name[s]
1004Rrename.L: tag[2]
1005    rename renames a file system object referenced by fid, to name
1006    in the directory referenced by dfid.
1007
1008    This operation will eventually be replaced by renameat.
1009
1010Treadlink.L = 22: tag[2] fid[4]
1011Rreadlink.L: tag[2] target[s]
1012    readlink returns the contents of teh symbolic link referenced by fid.
1013
1014Tgetattr.L = 24: tag[2] fid[4] request_mask[8]
1015Rgetattr.L: tag[2] valid[8] qid[qid] mode[4] uid[4] gid[4] nlink[8] \
1016          rdev[8] size[8] blksize[8] blocks[8] \
1017          atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8] \
1018          ctime_sec[8] ctime_nsec[8] btime_sec[8] btime_nsec[8] \
1019          gen[8] data_version[8]
1020
1021    getattr gets attributes of a file system object referenced by fid.
1022    The response is intended to follow pretty closely the fields
1023    returned by the stat(2) system call:
1024
1025    struct stat {
1026        dev_t     st_dev;     /* ID of device containing file */
1027        ino_t     st_ino;     /* inode number */
1028        mode_t    st_mode;    /* protection */
1029        nlink_t   st_nlink;   /* number of hard links */
1030        uid_t     st_uid;     /* user ID of owner */
1031        gid_t     st_gid;     /* group ID of owner */
1032        dev_t     st_rdev;    /* device ID (if special file) */
1033        off_t     st_size;    /* total size, in bytes */
1034        blksize_t st_blksize; /* blocksize for file system I/O */
1035        blkcnt_t  st_blocks;  /* number of 512B blocks allocated */
1036        time_t    st_atime;   /* time of last access */
1037        time_t    st_mtime;   /* time of last modification */
1038        time_t    st_ctime;   /* time of last status change */
1039    };
1040
1041    The differences are:
1042
1043     * st_dev is omitted
1044     * st_ino is contained in the path component of qid
1045     * times are nanosecond resolution
1046     * btime, gen and data_version fields are reserved for future use
1047
1048    Not all fields are valid in every call. request_mask is a bitmask
1049    indicating which fields are requested. valid is a bitmask
1050    indicating which fields are valid in the response. The mask
1051    values are as follows:
1052
1053    #define GETATTR_MODE        0x00000001
1054    #define GETATTR_NLINK       0x00000002
1055    #define GETATTR_UID         0x00000004
1056    #define GETATTR_GID         0x00000008
1057    #define GETATTR_RDEV        0x00000010
1058    #define GETATTR_ATIME       0x00000020
1059    #define GETATTR_MTIME       0x00000040
1060    #define GETATTR_CTIME       0x00000080
1061    #define GETATTR_INO         0x00000100
1062    #define GETATTR_SIZE        0x00000200
1063    #define GETATTR_BLOCKS      0x00000400
1064
1065    #define GETATTR_BTIME       0x00000800
1066    #define GETATTR_GEN         0x00001000
1067    #define GETATTR_DATA_VERSION 0x00002000
1068
1069    #define GETATTR_BASIC       0x000007ff  /* Mask for fields up to BLOCKS */
1070    #define GETATTR_ALL         0x00003fff  /* Mask for All fields above */
1071
1072Tsetattr.L = 26: tag[2] fid[4] valid[4] mode[4] uid[4] gid[4] size[8] \
1073               atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8]
1074Rsetattr.L: tag[2]
1075    setattr sets attributes of a file system object referenced by
1076    fid.  As with getattr, valid is a bitmask selecting which
1077    fields to set, which can be any combination of:
1078
1079    mode - Linux chmod(2) mode bits.
1080
1081    uid, gid - New owner, group of the file as described in Linux chown(2).
1082
1083    size - New file size as handled by Linux truncate(2).
1084
1085    atime_sec, atime_nsec - Time of last file access.
1086
1087    mtime_sec, mtime_nsec - Time of last file modification.
1088
1089    The valid bits are defined as follows:
1090
1091    #define SETATTR_MODE        0x00000001
1092    #define SETATTR_UID         0x00000002
1093    #define SETATTR_GID         0x00000004
1094    #define SETATTR_SIZE        0x00000008
1095    #define SETATTR_ATIME       0x00000010
1096    #define SETATTR_MTIME       0x00000020
1097    #define SETATTR_CTIME       0x00000040
1098    #define SETATTR_ATIME_SET   0x00000080
1099    #define SETATTR_MTIME_SET   0x00000100
1100
1101    If a time bit is set without the corresponding SET bit, the
1102    current system time on the server is used instead of the value
1103    sent in the request.
1104
1105Txattrwalk.L = 30: tag[2] fid[4] newfid[4] name[s]
1106Rxattrwalk.L: tag[2] size[8]
1107    xattrwalk gets a newfid pointing to xattr name.  This fid can
1108    later be used to read the xattr value.  If name is NULL newfid
1109    can be used to get the list of extended attributes associated
1110    with the file system object.
1111
1112Txattrcreate.L = 32: tag[2] fid[4] name[s] attr_size[8] flags[4]
1113Rxattrcreate.L: tag[2]
1114    xattrcreate gets a fid pointing to the xattr name.  This fid
1115    can later be used to set the xattr value.
1116
1117    flag is derived from set Linux setxattr. The manpage says
1118
1119        The flags parameter can be used to refine the semantics of
1120        the operation.  XATTR_CREATE specifies a pure create,
1121        which fails if the named attribute exists already.
1122        XATTR_REPLACE specifies a pure replace operation, which
1123        fails if the named attribute does not already exist.  By
1124        default (no flags), the extended attribute will be created
1125        if need be, or will simply replace the value if the
1126        attribute exists.
1127
1128    The actual setxattr operation happens when the fid is clunked.
1129    At that point the written byte count and the attr_size
1130    specified in TXATTRCREATE should be same otherwise an error
1131    will be returned.
1132
1133Treaddir.L = 40: tag[2] fid[4] offset[8] count[4]
1134Rreaddir.L: tag[2] count[4] data[count]
1135    readdir requests that the server return directory entries from
1136    the directory represented by fid, previously opened with
1137    lopen.  offset is zero on the first call.
1138
1139    Directory entries are represented as variable-length records:
1140        qid[qid] offset[8] type[1] name[s]
1141    At most count bytes will be returned in data.  If count is not
1142    zero in the response, more data is available.  On subsequent
1143    calls, offset is the offset returned in the last directory
1144    entry of the previous call.
1145
1146Tfsync.L = 50: tag[2] fid[4]
1147Rfsync.L: tag[2]
1148    fsync tells the server to flush any cached data associated
1149    with fid, previously opened with lopen.
1150
1151Tlock.L = 52: tag[2] fid[4] type[1] flags[4] start[8] length[8] \
1152       proc_id[4] client_id[s]
1153Rlock.L: tag[2] status[1]
1154    lock is used to acquire or release a POSIX record lock on fid
1155    and has semantics similar to Linux fcntl(F_SETLK).
1156
1157    type has one of the values:
1158
1159        #define LOCK_TYPE_RDLCK 0
1160        #define LOCK_TYPE_WRLCK 1
1161        #define LOCK_TYPE_UNLCK 2
1162
1163    start, length, and proc_id correspond to the analagous fields
1164    passed to Linux fcntl(F_SETLK):
1165
1166    struct flock {
1167        short l_type;  /* Type of lock: F_RDLCK, F_WRLCK, F_UNLCK */
1168        short l_whence;/* How to intrprt l_start: SEEK_SET,SEEK_CUR,SEEK_END */
1169        off_t l_start; /* Starting offset for lock */
1170        off_t l_len;   /* Number of bytes to lock */
1171        pid_t l_pid;   /* PID of process blocking our lock (F_GETLK only) */
1172    };
1173
1174    flags bits are:
1175
1176        #define LOCK_SUCCESS    0
1177        #define LOCK_BLOCKED    1
1178        #define LOCK_ERROR      2
1179        #define LOCK_GRACE      3
1180
1181    The Linux v9fs client implements the fcntl(F_SETLKW)
1182    (blocking) lock request by calling lock with
1183    LOCK_FLAGS_BLOCK set.  If the response is LOCK_BLOCKED,
1184    it retries the lock request in an interruptible loop until
1185    status is no longer LOCK_BLOCKED.
1186
1187    The Linux v9fs client translates BSD advisory locks (flock) to
1188    whole-file POSIX record locks.  v9fs does not implement
1189    mandatory locks and will return ENOLCK if use is attempted.
1190
1191    Because of POSIX record lock inheritance and upgrade
1192    properties, pass-through servers must be implemented
1193    carefully.
1194
1195Tgetlock.L = 54: tag[2] fid[4] type[1] start[8] length[8] proc_id[4] \
1196               client_id[s]
1197Rgetlock.L: tag[2] type[1] start[8] length[8] proc_id[4] client_id[s]
1198    getlock tests for the existence of a POSIX record lock and has
1199    semantics similar to Linux fcntl(F_GETLK).
1200
1201    As with lock, type has one of the values defined above, and
1202    start, length, and proc_id correspond to the analagous fields
1203    in struct flock passed to Linux fcntl(F_GETLK), and client_Id
1204    is an additional mechanism for uniquely identifying the lock
1205    requester and is set to the nodename by the Linux v9fs client.
1206
1207Tlink.L = 70: tag[2] dfid[4] fid[4] name[s]
1208Rlink.L: tag[2]
1209    link creates a hard link name in directory dfid.  The link
1210    target is referenced by fid.
1211
1212Tmkdir.L = 72: tag[2] dfid[4] name[s] mode[4] gid[4]
1213Rmkdir.L: tag[2] qid[qid]
1214    mkdir creates a new directory name in parent directory dfid.
1215
1216    mode contains Linux mkdir(2) mode bits.
1217
1218    gid is the effective group ID of the caller.
1219
1220    The qid of the new directory is returned in the response.
1221
1222Trenameat.L = 74: tag[2] olddirfid[4] oldname[s] newdirfid[4] newname[s]
1223Rrenameat.L: tag[2]
1224    Change the name of a file from oldname to newname, possible
1225    moving it from old directory represented by olddirfid to new
1226    directory represented by newdirfid.
1227
1228    If the server returns ENOTSUPP, the client should fall back to
1229    the rename operation.
1230
1231Tunlinkat.L = 76: tag[2] dirfd[4] name[s] flags[4]
1232Runlinkat.L: tag[2]
1233    Unlink name from directory represented by dirfd.  If the file
1234    is represented by a fid, that fid is not clunked.  If the
1235    server returns ENOTSUPP, the client should fall back to the
1236    remove operation.
1237
1238    There seems to be only one defined flag:
1239
1240        #define AT_REMOVEDIR    0x200
1241
1242Tversion = 100: tag[2] msize[4] version[s]:auto
1243Rversion: tag[2] msize[4] version[s]
1244
1245    negotiate protocol version
1246
1247    version establishes the msize, which is the maximum message
1248    size inclusive of the size value that can be handled by both
1249    client and server.
1250
1251    It also establishes the protocol version.  For 9P2000.L
1252    version must be the string 9P2000.L.
1253
1254Tauth = 102: tag[2] afid[4] uname[s] aname[s] n_uname[4]
1255Rauth: tag[2] aqid[qid]
1256    auth initiates an authentication handshake for n_uname.
1257    Rlerror is returned if authentication is not required.  If
1258    successful, afid is used to read/write the authentication
1259    handshake (protocol does not specify what is read/written),
1260    and afid is presented in the attach.
1261
1262Tattach = 104: tag[2] fid[4] afid[4] uname[s] aname[s] {.u: n_uname[4] }
1263Rattach: tag[2] qid[qid]
1264    attach introduces a new user to the server, and establishes
1265    fid as the root for that user on the file tree selected by
1266    aname.
1267
1268    afid can be NOFID (~0) or the fid from a previous auth
1269    handshake.  The afid can be clunked immediately after the
1270    attach.
1271
1272        #define NOFID       0xffffffff
1273
1274    n_uname, if not set to NONUNAME (~0), is the uid of the
1275    user and is used in preference to uname.  Note that it appears
1276    in both .u and .L (unlike most .u-specific features).
1277
1278        #define NONUNAME    0xffffffff
1279
1280    v9fs has several modes of access which determine how it uses
1281    attach.  In the default access=user, an initial attach is sent
1282    for the user provided in the uname=name mount option, and for
1283    each user that accesses the file system thereafter.  For
1284    access=, only the initial attach is sent for and all other
1285    users are denied access by the client.
1286
1287Rerror = 107: tag[2] errstr[s] {.u: errnum[4] }
1288
1289Tflush = 108: tag[2] oldtag[2]
1290Rflush: tag[2]
1291    flush aborts an in-flight request referenced by oldtag, if any.
1292
1293Twalk = 110: tag[2] fid[4] newfid[4] nwname[2] nwname*(wname[s])
1294Rwalk: tag[2] nwqid[2] nwqid*(wqid[qid])
1295    walk is used to descend a directory represented by fid using
1296    successive path elements provided in the wname array.  If
1297    succesful, newfid represents the new path.
1298
1299    fid can be cloned to newfid by calling walk with nwname set to
1300    zero.
1301
1302    if nwname==0, fid need not represent a directory.
1303
1304Topen = 112: tag[2] fid[4] mode[1]
1305Ropen: tag[2] qid[qid] iounit[4]
1306    open prepares fid for file (or directory) I/O.
1307
1308    mode is:
1309        #define OREAD       0   /* open for read */
1310        #define OWRITE      1   /* open for write */
1311        #define ORDWR       2   /* open for read and write */
1312        #define OEXEC       3   /* open for execute */
1313
1314        #define OTRUNC      16  /* truncate (illegal if OEXEC) */
1315        #define OCEXEC      32  /* close on exec (nonsensical) */
1316        #define ORCLOSE     64  /* remove on close */
1317        #define ODIRECT     128 /* direct access (.u extension?) */
1318
1319Tcreate = 114: tag[2] fid[4] name[s] perm[4] mode[1] {.u: extension[s] }
1320Rcreate: tag[2] qid[qid] iounit[4]
1321    create is similar to open; however, the incoming fid is the
1322    diretory in which the file is to be created, and on success,
1323    return, the fid refers to the then-created file.
1324
1325Tread = 116: tag[2] fid[4] offset[8] count[4]
1326Rread: tag[2] count[4] data[count]
1327    perform a read on the file represented by fid.  Note that in
1328    v9fs, a read(2) or write(2) system call for a chunk of the
1329    file that won't fit in a single request is broken up into
1330    multiple requests.
1331
1332    Under 9P2000.L, read cannot be used on directories.  See readdir.
1333
1334Twrite = 118: tag[2] fid[4] offset[8] count[4] data[count]
1335Rwrite: tag[2] count[4]
1336    perform a write on the file represented by fid.  Note that in
1337    v9fs, a read(2) or write(2) system call for a chunk of the
1338    file that won't fit in a single request is broken up into
1339    multiple requests.
1340
1341    write cannot be used on directories.
1342
1343Tclunk = 120: tag[2] fid[4]
1344Rclunk: tag[2]
1345    clunk signifies that fid is no longer needed by the client.
1346
1347Tremove = 122: tag[2] fid[4]
1348Rremove: tag[2]
1349    remove removes the file system object represented by fid.
1350
1351    The fid is always clunked (even on error).
1352
1353Tstat = 124: tag[2] fid[4]
1354Rstat: tag[2] size[2] data[size]
1355
1356Twstat = 126: tag[2] fid[4] size[2] data[size]
1357Rwstat: tag[2]
1358"""
1359
1360class _Token(object):
1361    r"""
1362    A scanned token.
1363
1364    Tokens have a type (tok.ttype) and value (tok.value).  The value
1365    is generally the token itself, although sometimes a prefix and/or
1366    suffix has been removed (for 'label', 'word*', ':aux', and
1367    '[type]' tokens).  If prefix and/or suffix are removed, the full
1368    original token is
1369    in its .orig.
1370
1371    Tokens are:
1372     - 'word', 'word*', or 'label':
1373         '[.\w]+' followed by optional '*' or ':':
1374
1375     - 'aux': ':' followed by '\w+' (used for :auto annotation)
1376
1377     - 'type':
1378       open bracket '[', followed by '\w+' or '\d+' (only one of these),
1379       followed by close bracket ']'
1380
1381     - '(', ')', '{', '}': themeselves
1382
1383    Each token can have arbitrary leading white space (which is
1384    discarded).
1385
1386    (Probably should return ':' as a char and handle it in parser,
1387    but oh well.)
1388    """
1389    def __init__(self, ttype, value, orig=None):
1390        self.ttype = ttype
1391        self.value = value
1392        self.orig = value if orig is None else orig
1393        if self.ttype == 'type' and self.value.isdigit():
1394            self.ival = int(self.value)
1395        else:
1396            self.ival = None
1397    def __str__(self):
1398        return self.orig
1399
1400_Token.tok_expr = re.compile(r'\s*([.\w]+(?:\*|:)?'
1401                             r'|:\w+'
1402                             r'|\[(?:\w+|\d+)\]'
1403                             r'|[(){}])')
1404
1405def _scan(string):
1406    """
1407    Tokenize a string.
1408
1409    Note: This raises a ValueError with the position of any unmatched
1410    character in the string.
1411    """
1412    tlist = []
1413
1414    # make sure entire string is tokenized properly
1415    pos = 0
1416    for item in _Token.tok_expr.finditer(string):
1417        span = item.span()
1418        if span[0] != pos:
1419            print('error: unmatched character(s) in input\n{0}\n{1}^'.format(
1420                string, ' ' * pos))
1421            raise ValueError('unmatched lexeme', pos)
1422        pos = span[1]
1423        tlist.append(item.group(1))
1424    if pos != len(string):
1425        print('error: unmatched character(s) in input\n{0}\n{1}^'.format(
1426            string, ' ' * pos))
1427        raise ValueError('unmatched lexeme', pos)
1428
1429    # classify each token, stripping decorations
1430    result = []
1431    for item in tlist:
1432        if item in ('(', ')', '{', '}'):
1433            tok = _Token(item, item)
1434        elif item[0] == ':':
1435            tok = _Token('aux', item[1:], item)
1436        elif item.endswith(':'):
1437            tok = _Token('label', item[0:-1], item)
1438        elif item.endswith('*'):
1439            tok = _Token('word*', item[0:-1], item)
1440        elif item[0] == '[':
1441            # integer or named type
1442            if item[-1] != ']':
1443                raise ValueError('internal error: "{0}" is not [...]'.format(
1444                    item))
1445            tok = _Token('type', item[1:-1], item)
1446        else:
1447            tok = _Token('word', item)
1448        result.append(tok)
1449    return result
1450
1451def _debug_print_sequencer(seq):
1452    """for debugging"""
1453    print('sequencer is {0!r}'.format(seq), file=sys.stderr)
1454    for i, enc in enumerate(seq):
1455        print(' [{0:d}] = {1}'.format(i, enc), file=sys.stderr)
1456
1457def _parse_expr(seq, string, typedefs):
1458    """
1459    Parse "expression-ish" items, which is a list of:
1460        name[type]
1461        name*(subexpr)    (a literal asterisk)
1462        { label ... }
1463
1464    The "type" may be an integer or a second name.  In the case
1465    of a second name it must be something from <typedefs>.
1466
1467    The meaning of name[integer] is that we are going to encode
1468    or decode a fixed-size field of <integer> bytes, using the
1469    given name.
1470
1471    For name[name2], we can look up name2 in our typedefs table.
1472    The only real typedefs's used here are "stat" and "s"; each
1473    of these expands to a variable-size encode/decode.  See the
1474    special case below, though.
1475
1476    The meaning of name*(...) is: the earlier name will have been
1477    defined by an earlier _parse_expr for this same line.  That
1478    earlier name provides a repeat-count.
1479
1480    Inside the parens we get a name[type] sub-expressino.  This may
1481    not recurse further, so we can use a pretty cheesy parser.
1482
1483    As a special case, given name[name2], we first check whether
1484    name2 is an earlier name a la name*(...).  Here the meaning
1485    is much like name2*(name[1]), except that the result is a
1486    simple byte string, rather than an array.
1487
1488    The meaning of "{ label ... " is that everything following up
1489    to "}" is optional and used only with 9P2000.u and/or 9P2000.L.
1490    Inside the {...} pair is the usual set of tokens, but again
1491    {...} cannot recurse.
1492
1493    The parse fills in a Sequencer instance, and returns a list
1494    of the parsed names.
1495    """
1496    names = []
1497    cond = None
1498
1499    tokens = collections.deque(_scan(string))
1500
1501    def get_subscripted(tokens):
1502        """
1503        Allows name[integer] and name1[name2] only; returns
1504        tuple after stripping off both tokens, or returns None
1505        and does not strip tokens.
1506        """
1507        if len(tokens) == 0 or tokens[0].ttype != 'word':
1508            return None
1509        if len(tokens) > 1 and tokens[1].ttype == 'type':
1510            word = tokens.popleft()
1511            return word, tokens.popleft()
1512        return None
1513
1514    def lookup(name, typeinfo, aux=None):
1515        """
1516        Convert cond (if not None) to its .value, so that instead
1517        of (x, '.u') we get '.u'.
1518
1519        Convert typeinfo to an encdec.  Typeinfo may be 1/2/4/8, or
1520        one of our typedef names.  If it's a typedef name it will
1521        normally correspond to an EncDecTyped, but we have one special
1522        case for string types, and another for using an earlier-defined
1523        variable.
1524        """
1525        condval = None if cond is None else cond.value
1526        if typeinfo.ival is None:
1527            try:
1528                cls, sub = typedefs[typeinfo.value]
1529            except KeyError:
1530                raise ValueError('unknown type name {0}'.format(typeinfo))
1531            # the type name is typeinfo.value; the corresponding
1532            # pfod class is cls; the *variable* name is name;
1533            # and the sub-sequence is sub.  But if cls is None
1534            # then it's our string type.
1535            if cls is None:
1536                encdec = sequencer.EncDecSimple(name, _STRING_MAGIC, aux)
1537            else:
1538                encdec = sequencer.EncDecTyped(cls, name, sub, aux)
1539        else:
1540            if typeinfo.ival not in (1, 2, 4, 8):
1541                raise ValueError('bad integer code in {0}'.format(typeinfo))
1542            encdec = sequencer.EncDecSimple(name, typeinfo.ival, aux)
1543        return condval, encdec
1544
1545    def emit_simple(name, typeinfo, aux=None):
1546        """
1547        Emit name[type].  We may be inside a conditional; if so
1548        cond is not None.
1549        """
1550        condval, encdec = lookup(name, typeinfo, aux)
1551        seq.append_encdec(condval, encdec)
1552        names.append(name)
1553
1554    def emit_repeat(name1, name2, typeinfo):
1555        """
1556        Emit name1*(name2[type]).
1557
1558        Note that the conditional is buried in the sub-coder for
1559        name2.  It must be passed through anyway in case the sub-
1560        coder is only partly conditional.  If the sub-coder is
1561        fully conditional, each sub-coding uses or produces no
1562        bytes and hence the array itself is effectively conditional
1563        as well (it becomes name1 * [None]).
1564
1565        We don't (currently) have any auxiliary data for arrays.
1566        """
1567        if name1 not in names:
1568            raise ValueError('{0}*({1}[{2}]): '
1569                             '{0} undefined'.format(name1, name2,
1570                                                    typeinfo.value))
1571        condval, encdec = lookup(name2, typeinfo)
1572        encdec = sequencer.EncDecA(name1, name2, encdec)
1573        seq.append_encdec(condval, encdec)
1574        names.append(name2)
1575
1576    def emit_bytes_repeat(name1, name2):
1577        """
1578        Emit name1[name2], e.g., data[count].
1579        """
1580        condval = None if cond is None else cond.value
1581        # Note that the two names are reversed when compared to
1582        # count*(data[type]).  The "sub-coder" is handled directly
1583        # by EncDecA, hence is None.
1584        #
1585        # As a peculiar side effect, all bytes-repeats cause the
1586        # count itself to become automatic (to have an aux of 'len').
1587        encdec = sequencer.EncDecA(name2, name1, None, 'len')
1588        seq.append_encdec(condval, encdec)
1589        names.append(name1)
1590
1591    supported_conditions = ('.u')
1592    while tokens:
1593        token = tokens.popleft()
1594        if token.ttype == 'label':
1595            raise ValueError('misplaced label')
1596        if token.ttype == 'aux':
1597            raise ValueError('misplaced auxiliary')
1598        if token.ttype == '{':
1599            if cond is not None:
1600                raise ValueError('nested "{"')
1601            if len(tokens) == 0:
1602                raise ValueError('unclosed "{"')
1603            cond = tokens.popleft()
1604            if cond.ttype != 'label':
1605                raise ValueError('"{" not followed by cond label')
1606            if cond.value not in supported_conditions:
1607                raise ValueError('unsupported condition "{0}"'.format(
1608                    cond.value))
1609            continue
1610        if token.ttype == '}':
1611            if cond is None:
1612                raise ValueError('closing "}" w/o opening "{"')
1613            cond = None
1614            continue
1615        if token.ttype == 'word*':
1616            if len(tokens) == 0 or tokens[0].ttype != '(':
1617                raise ValueError('{0} not followed by (...)'.format(token))
1618            tokens.popleft()
1619            repeat = get_subscripted(tokens)
1620            if repeat is None:
1621                raise ValueError('parse error after {0}('.format(token))
1622            if len(tokens) == 0 or tokens[0].ttype != ')':
1623                raise ValueError('missing ")" after {0}({1}{2}'.format(
1624                    token, repeat[0], repeat[1]))
1625            tokens.popleft()
1626            # N.B.: a repeat cannot have an auxiliary info (yet?).
1627            emit_repeat(token.value, repeat[0].value, repeat[1])
1628            continue
1629        if token.ttype == 'word':
1630            # Special case: _STRING_MAGIC turns into a string
1631            # sequencer.  This should be used with just one
1632            # typedef (typedef s: _string_).
1633            if token.value == _STRING_MAGIC:
1634                names.append(_STRING_MAGIC) # XXX temporary
1635                continue
1636            if len(tokens) == 0 or tokens[0].ttype != 'type':
1637                raise ValueError('parse error after {0}'.format(token))
1638            type_or_size = tokens.popleft()
1639            # Check for name[name2] where name2 is a word (not a
1640            # number) that is in the names[] array.
1641            if type_or_size.value in names:
1642                # NB: this cannot have auxiliary info.
1643                emit_bytes_repeat(token.value, type_or_size.value)
1644                continue
1645            if len(tokens) > 0 and tokens[0].ttype == 'aux':
1646                aux = tokens.popleft()
1647                if aux.value != 'auto':
1648                    raise ValueError('{0}{1}: only know "auto", not '
1649                                     '{2}'.format(token, type_or_size,
1650                                                  aux.value))
1651                emit_simple(token.value, type_or_size, aux.value)
1652            else:
1653                emit_simple(token.value, type_or_size)
1654            continue
1655        raise ValueError('"{0}" not valid here"'.format(token))
1656
1657    if cond is not None:
1658        raise ValueError('unclosed "}"')
1659
1660    return names
1661
1662class _ProtoDefs(object):
1663    def __init__(self):
1664        # Scan our typedefs. This may execute '#define's as well.
1665        self.typedefs = {}
1666        self.defines = {}
1667        typedef_re = re.compile(r'\s*typedef\s+(\w+)\s*:\s*(.*)')
1668        self.parse_lines('SDesc', SDesc, typedef_re, self.handle_typedef)
1669        self.parse_lines('QIDDesc', QIDDesc, typedef_re, self.handle_typedef)
1670        self.parse_lines('STATDesc', STATDesc, typedef_re, self.handle_typedef)
1671        self.parse_lines('WirestatDesc', WirestatDesc, typedef_re,
1672                         self.handle_typedef)
1673        self.parse_lines('DirentDesc', DirentDesc, typedef_re,
1674                         self.handle_typedef)
1675
1676        # Scan protocol (the bulk of the work).  This, too, may
1677        # execute '#define's.
1678        self.protocol = {}
1679        proto_re = re.compile(r'(\*?\w+)(\.\w+)?\s*(?:=\s*(\d+))?\s*:\s*(.*)')
1680        self.prev_proto_value = None
1681        self.parse_lines('ProtocolDesc', ProtocolDesc,
1682                         proto_re, self.handle_proto_def)
1683
1684        self.setup_header()
1685
1686        # set these up for export()
1687        self.plain = {}
1688        self.dotu = {}
1689        self.dotl = {}
1690
1691    def parse_lines(self, name, text, regexp, match_handler):
1692        """
1693        Parse a sequence of lines.  Match each line using the
1694        given regexp, or (first) as a #define line.  Note that
1695        indented lines are either #defines or are commentary!
1696
1697        If hnadling raises a ValueError, we complain and include
1698        the appropriate line offset.  Then we sys.exit(1) (!).
1699        """
1700        define = re.compile(r'\s*#define\s+(\w+)\s+([^/]*)'
1701                            r'(\s*/\*.*\*/)?\s*$')
1702        for lineoff, line in enumerate(text.splitlines()):
1703            try:
1704                match = define.match(line)
1705                if match:
1706                    self.handle_define(*match.groups())
1707                    continue
1708                match = regexp.match(line)
1709                if match:
1710                    match_handler(*match.groups())
1711                    continue
1712                if len(line) and not line[0].isspace():
1713                    raise ValueError('unhandled line: {0}'.format(line))
1714            except ValueError as err:
1715                print('Internal error while parsing {0}:\n'
1716                      '    {1}\n'
1717                      '(at line offset +{2}, discounting \\-newline)\n'
1718                      'The original line in question reads:\n'
1719                      '{3}'.format(name, err.args[0], lineoff, line),
1720                      file=sys.stderr)
1721                sys.exit(1)
1722
1723    def handle_define(self, name, value, comment):
1724        """
1725        Handle #define match.
1726
1727        The regexp has three fields, matching the name, value,
1728        and possibly-empty comment; these are our arguments.
1729        """
1730        # Obnoxious: int(,0) requires new 0o syntax in py3k;
1731        # work around by trying twice, once with base 0, then again
1732        # with explicit base 8 if the first attempt fails.
1733        try:
1734            value = int(value, 0)
1735        except ValueError:
1736            value = int(value, 8)
1737        if DEBUG:
1738            print('define: defining {0} as {1:x}'.format(name, value),
1739                  file=sys.stderr)
1740        if name in self.defines:
1741            raise ValueError('redefining {0}'.format(name))
1742        self.defines[name] = (value, comment)
1743
1744    def handle_typedef(self, name, expr):
1745        """
1746        Handle typedef match.
1747
1748        The regexp has just two fields, the name and the expression
1749        to parse (note that the expression must fit all on one line,
1750        using backslach-newline if needed).
1751
1752        Typedefs may refer back to existing typedefs, so we pass
1753        self.typedefs to _parse_expr().
1754        """
1755        seq = sequencer.Sequencer(name)
1756        fields = _parse_expr(seq, expr, self.typedefs)
1757        # Check for special string magic typedef.  (The name
1758        # probably should be just 's' but we won't check that
1759        # here.)
1760        if len(fields) == 1 and fields[0] == _STRING_MAGIC:
1761            cls = None
1762        else:
1763            cls = pfod.pfod(name, fields)
1764        if DEBUG:
1765            print('typedef: {0} = {1!r}; '.format(name, fields),
1766                  end='', file=sys.stderr)
1767            _debug_print_sequencer(seq)
1768        if name in self.typedefs:
1769            raise ValueError('redefining {0}'.format(name))
1770        self.typedefs[name] = cls, seq
1771
1772    def handle_proto_def(self, name, proto_version, value, expr):
1773        """
1774        Handle protocol definition.
1775
1776        The regexp matched:
1777        - The name of the protocol option such as Tversion,
1778          Rversion, Rlerror, etc.
1779        - The protocol version, if any (.u or .L).
1780        - The value, if specified.  If no value is specified
1781          we use "the next value".
1782        - The expression to parse.
1783
1784        As with typedefs, the expression must fit all on one
1785        line.
1786        """
1787        if value:
1788            value = int(value)
1789        elif self.prev_proto_value is not None:
1790            value = self.prev_proto_value + 1
1791        else:
1792            raise ValueError('{0}: missing protocol value'.format(name))
1793        if value < 0 or value > 255:
1794            raise ValueError('{0}: protocol value {1} out of '
1795                             'range'.format(name, value))
1796        self.prev_proto_value = value
1797
1798        seq = sequencer.Sequencer(name)
1799        fields = _parse_expr(seq, expr, self.typedefs)
1800        cls = pfod.pfod(name, fields)
1801        if DEBUG:
1802            print('proto: {0} = {1}; '.format(name, value),
1803                  end='', file=sys.stderr)
1804            _debug_print_sequencer(seq)
1805        if name in self.protocol:
1806            raise ValueError('redefining {0}'.format(name))
1807        self.protocol[name] = cls, value, proto_version, seq
1808
1809    def setup_header(self):
1810        """
1811        Handle header definition.
1812
1813        This is a bit gimmicky and uses some special cases,
1814        because data is sized to dsize which is effectively
1815        just size - 5.  We can't express this in our mini language,
1816        so we just hard-code the sequencer and pfod.
1817
1818        In addition, the unpacker never gets the original packet's
1819        size field, only the fcall and the data.
1820        """
1821        self.header_pfod = pfod.pfod('Header', 'size dsize fcall data')
1822
1823        seq = sequencer.Sequencer('Header-pack')
1824        # size: 4 bytes
1825        seq.append_encdec(None, sequencer.EncDecSimple('size', 4, None))
1826        # fcall: 1 byte
1827        seq.append_encdec(None, sequencer.EncDecSimple('fcall', 1, None))
1828        # data: string of length dsize
1829        seq.append_encdec(None, sequencer.EncDecA('dsize', 'data', None))
1830        if DEBUG:
1831            print('Header-pack:', file=sys.stderr)
1832            _debug_print_sequencer(seq)
1833        self.header_pack_seq = seq
1834
1835        seq = sequencer.Sequencer('Header-unpack')
1836        seq.append_encdec(None, sequencer.EncDecSimple('fcall', 1, None))
1837        seq.append_encdec(None, sequencer.EncDecA('dsize', 'data', None))
1838        if DEBUG:
1839            print('Header-unpack:', file=sys.stderr)
1840            _debug_print_sequencer(seq)
1841        self.header_unpack_seq = seq
1842
1843    def export(self, mod):
1844        """
1845        Dump results of internal parsing process
1846        into our module namespace.
1847
1848        Note that we do not export the 's' typedef, which
1849        did not define a data structure.
1850
1851        Check for name collisions while we're at it.
1852        """
1853        namespace = type('td', (object,), {})
1854
1855        # Export the typedefs (qid, stat).
1856        setattr(mod, 'td', namespace)
1857        for key in self.typedefs:
1858            cls = self.typedefs[key][0]
1859            if cls is None:
1860                continue
1861            setattr(namespace, key, cls)
1862
1863        # Export two sequencers for en/decoding stat fields
1864        # (needed for reading directories and doing Twstat).
1865        setattr(namespace, 'stat_seq', self.typedefs['stat'][1])
1866        setattr(namespace, 'wirestat_seq', self.typedefs['wirestat'][1])
1867
1868        # Export the similar dirent decoder.
1869        setattr(namespace, 'dirent_seq', self.typedefs['dirent'][1])
1870
1871        # Export the #define values
1872        for key, val in self.defines.items():
1873            if hasattr(namespace, key):
1874                print('{0!r} is both a #define and a typedef'.format(key))
1875                raise AssertionError('bad internal names')
1876            setattr(namespace, key, val[0])
1877
1878        # Export Tattach, Rattach, Twrite, Rversion, etc values.
1879        # Set up fcall_names[] table to map from value back to name.
1880        # We also map fcall names to themselves, so given either a
1881        # name or a byte code we can find out whether it's a valid
1882        # fcall.
1883        for key, val in self.protocol.items():
1884            if hasattr(namespace, key):
1885                prev_def = '#define' if key in self.defines else 'typedef'
1886                print('{0!r} is both a {1} and a protocol '
1887                      'value'.format(key, prev_def))
1888                raise AssertionError('bad internal names')
1889            setattr(namespace, key, val[1])
1890            fcall_names[key] = key
1891            fcall_names[val[1]] = key
1892
1893        # Hook up PFOD's for each protocol object -- for
1894        # Tversion/Rversion, Twrite/Rwrite, Tlopen/Rlopen, etc.
1895        # They go in the rrd name-space, and also in dictionaries
1896        # per-protocol here, with the lookup pointing to a _PackInfo
1897        # for the corresponding sequencer.
1898        #
1899        # Note that each protocol PFOD is optionally annotated with
1900        # its specific version.  We know that .L > .u > plain; but
1901        # all the "lesser" PFODs are available to all "greater"
1902        # protocols at all times.
1903        #
1904        # (This is sort-of-wrong for Rerror vs Rlerror, but we
1905        # don't bother to exclude Rerror from .L.)
1906        #
1907        # The PFODs themselves were already created, at parse time.
1908        namespace = type('rrd', (object,), {})
1909        setattr(mod, 'rrd', namespace)
1910        for key, val in self.protocol.items():
1911            cls = val[0]
1912            proto_version = val[2]
1913            seq = val[3]
1914            packinfo = _PackInfo(seq)
1915            if proto_version is None:
1916                # all three protocols have it
1917                self.plain[cls] = packinfo
1918                self.dotu[cls] = packinfo
1919                self.dotl[cls] = packinfo
1920            elif proto_version == '.u':
1921                # only .u and .L have it
1922                self.dotu[cls] = packinfo
1923                self.dotl[cls] = packinfo
1924            elif proto_version == '.L':
1925                # only .L has it
1926                self.dotl[cls] = packinfo
1927            else:
1928                raise AssertionError('unknown protocol {1} for '
1929                                     '{0}'.format(key, proto_version))
1930            setattr(namespace, key, cls)
1931
1932_9p_data = _ProtoDefs()
1933_9p_data.export(sys.modules[__name__])
1934
1935# Currently we look up by text-string, in lowercase.
1936_9p_versions = {
1937    '9p2000': _P9Proto({'version': '9P2000'},
1938                       {'.u': False},
1939                       _9p_data,
1940                       _9p_data.plain,
1941                       0),
1942    '9p2000.u': _P9Proto({'version': '9P2000.u'},
1943                         {'.u': True},
1944                         _9p_data,
1945                         _9p_data.dotu,
1946                         1),
1947    '9p2000.l': _P9Proto({'version': '9P2000.L'},
1948                         {'.u': True},
1949                         _9p_data,
1950                         _9p_data.dotl,
1951                         2),
1952}
1953def p9_version(vers_string):
1954    """
1955    Return protocol implementation of given version.  Raises
1956    KeyError if the version is invalid.  Note that the KeyError
1957    will be on a string-ified, lower-cased version of the vers_string
1958    argument, even if it comes in as a bytes instance in py3k.
1959    """
1960    if not isinstance(vers_string, str) and isinstance(vers_string, bytes):
1961        vers_string = vers_string.decode('utf-8', 'surrogateescape')
1962    return _9p_versions[vers_string.lower()]
1963
1964plain = p9_version('9p2000')
1965dotu = p9_version('9p2000.u')
1966dotl = p9_version('9p2000.L')
1967
1968def qid_type2name(qidtype):
1969    """
1970    Convert qid type field to printable string.
1971
1972    >>> qid_type2name(td.QTDIR)
1973    'dir'
1974    >>> qid_type2name(td.QTAPPEND)
1975    'append-only'
1976    >>> qid_type2name(0xff)
1977    'invalid(0xff)'
1978    """
1979    try:
1980        # Is it ever OK to have multiple bits set,
1981        # e.g., both QTAPPEND and QTEXCL?
1982        return {
1983            td.QTDIR: 'dir',
1984            td.QTAPPEND: 'append-only',
1985            td.QTEXCL: 'exclusive',
1986            td.QTMOUNT: 'mount',
1987            td.QTAUTH: 'auth',
1988            td.QTTMP: 'tmp',
1989            td.QTSYMLINK: 'symlink',
1990            td.QTFILE: 'file',
1991        }[qidtype]
1992    except KeyError:
1993        pass
1994    return 'invalid({0:#x})'.format(qidtype)
1995
1996if __name__ == '__main__':
1997    import doctest
1998    doctest.testmod()
1999