xref: /freebsd/contrib/wpa/src/wps/upnp_xml.c (revision c1d255d3ffdbe447de3ab875bf4e7d7accc5bfc5)
1  /*
2   * UPnP XML helper routines
3   * Copyright (c) 2000-2003 Intel Corporation
4   * Copyright (c) 2006-2007 Sony Corporation
5   * Copyright (c) 2008-2009 Atheros Communications
6   * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
7   *
8   * See wps_upnp.c for more details on licensing and code history.
9   */
10  
11  #include "includes.h"
12  
13  #include "common.h"
14  #include "base64.h"
15  #include "http.h"
16  #include "upnp_xml.h"
17  
18  
19  /*
20   * XML parsing and formatting
21   *
22   * XML is a markup language based on unicode; usually (and in our case,
23   * always!) based on utf-8. utf-8 uses a variable number of bytes per
24   * character. utf-8 has the advantage that all non-ASCII unicode characters are
25   * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
26   * characters are single ascii bytes, thus we can use typical text processing.
27   *
28   * (One other interesting thing about utf-8 is that it is possible to look at
29   * any random byte and determine if it is the first byte of a character as
30   * versus a continuation byte).
31   *
32   * The base syntax of XML uses a few ASCII punctionation characters; any
33   * characters that would appear in the payload data are rewritten using
34   * sequences, e.g., &amp; for ampersand(&) and &lt for left angle bracket (<).
35   * Five such escapes total (more can be defined but that does not apply to our
36   * case). Thus we can safely parse for angle brackets etc.
37   *
38   * XML describes tree structures of tagged data, with each element beginning
39   * with an opening tag <label> and ending with a closing tag </label> with
40   * matching label. (There is also a self-closing tag <label/> which is supposed
41   * to be equivalent to <label></label>, i.e., no payload, but we are unlikely
42   * to see it for our purpose).
43   *
44   * Actually the opening tags are a little more complicated because they can
45   * contain "attributes" after the label (delimited by ascii space or tab chars)
46   * of the form attribute_label="value" or attribute_label='value'; as it turns
47   * out we do not have to read any of these attributes, just ignore them.
48   *
49   * Labels are any sequence of chars other than space, tab, right angle bracket
50   * (and ?), but may have an inner structure of <namespace><colon><plain_label>.
51   * As it turns out, we can ignore the namespaces, in fact we can ignore the
52   * entire tree hierarchy, because the plain labels we are looking for will be
53   * unique (not in general, but for this application). We do however have to be
54   * careful to skip over the namespaces.
55   *
56   * In generating XML we have to be more careful, but that is easy because
57   * everything we do is pretty canned. The only real care to take is to escape
58   * any special chars in our payload.
59   */
60  
61  /**
62   * xml_next_tag - Advance to next tag
63   * @in: Input
64   * @out: OUT: start of tag just after '<'
65   * @out_tagname: OUT: start of name of tag, skipping namespace
66   * @end: OUT: one after tag
67   * Returns: 0 on success, 1 on failure
68   *
69   * A tag has form:
70   *     <left angle bracket><...><right angle bracket>
71   * Within the angle brackets, there is an optional leading forward slash (which
72   * makes the tag an ending tag), then an optional leading label (followed by
73   * colon) and then the tag name itself.
74   *
75   * Note that angle brackets present in the original data must have been encoded
76   * as &lt; and &gt; so they will not trouble us.
77   */
xml_next_tag(const char * in,const char ** out,const char ** out_tagname,const char ** end)78  int xml_next_tag(const char *in, const char **out,
79  		 const char **out_tagname, const char **end)
80  {
81  	while (*in && *in != '<')
82  		in++;
83  	if (*in != '<')
84  		return 1;
85  	*out = ++in;
86  	if (*in == '/')
87  		in++;
88  	*out_tagname = in; /* maybe */
89  	while (isalnum(*in) || *in == '-')
90  		in++;
91  	if (*in == ':')
92  		*out_tagname = ++in;
93  	while (*in && *in != '>')
94  		in++;
95  	if (*in != '>')
96  		return 1;
97  	*end = ++in;
98  	return 0;
99  }
100  
101  
102  /* xml_data_encode -- format data for xml file, escaping special characters.
103   *
104   * Note that we assume we are using utf8 both as input and as output!
105   * In utf8, characters may be classed as follows:
106   *     0xxxxxxx(2) -- 1 byte ascii char
107   *     11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
108   *         110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
109   *         1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
110   *         11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
111   *      10xxxxxx(2) -- extension byte (6 payload bits per byte)
112   *      Some values implied by the above are however illegal because they
113   *      do not represent unicode chars or are not the shortest encoding.
114   * Actually, we can almost entirely ignore the above and just do
115   * text processing same as for ascii text.
116   *
117   * XML is written with arbitrary unicode characters, except that five
118   * characters have special meaning and so must be escaped where they
119   * appear in payload data... which we do here.
120   */
xml_data_encode(struct wpabuf * buf,const char * data,int len)121  void xml_data_encode(struct wpabuf *buf, const char *data, int len)
122  {
123  	int i;
124  	for (i = 0; i < len; i++) {
125  		u8 c = ((u8 *) data)[i];
126  		if (c == '<') {
127  			wpabuf_put_str(buf, "&lt;");
128  			continue;
129  		}
130  		if (c == '>') {
131  			wpabuf_put_str(buf, "&gt;");
132  			continue;
133  		}
134  		if (c == '&') {
135  			wpabuf_put_str(buf, "&amp;");
136  			continue;
137  		}
138  		if (c == '\'') {
139  			wpabuf_put_str(buf, "&apos;");
140  			continue;
141  		}
142  		if (c == '"') {
143  			wpabuf_put_str(buf, "&quot;");
144  			continue;
145  		}
146  		/*
147  		 * We could try to represent control characters using the
148  		 * sequence: &#x; where x is replaced by a hex numeral, but not
149  		 * clear why we would do this.
150  		 */
151  		wpabuf_put_u8(buf, c);
152  	}
153  }
154  
155  
156  /* xml_add_tagged_data -- format tagged data as a new xml line.
157   *
158   * tag must not have any special chars.
159   * data may have special chars, which are escaped.
160   */
xml_add_tagged_data(struct wpabuf * buf,const char * tag,const char * data)161  void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data)
162  {
163  	wpabuf_printf(buf, "<%s>", tag);
164  	xml_data_encode(buf, data, os_strlen(data));
165  	wpabuf_printf(buf, "</%s>\n", tag);
166  }
167  
168  
169  /* A POST body looks something like (per upnp spec):
170   * <?xml version="1.0"?>
171   * <s:Envelope
172   *     xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
173   *     s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
174   *   <s:Body>
175   *     <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
176   *       <argumentName>in arg value</argumentName>
177   *       other in args and their values go here, if any
178   *     </u:actionName>
179   *   </s:Body>
180   * </s:Envelope>
181   *
182   * where :
183   *      s: might be some other namespace name followed by colon
184   *      u: might be some other namespace name followed by colon
185   *      actionName will be replaced according to action requested
186   *      schema following actionName will be WFA scheme instead
187   *      argumentName will be actual argument name
188   *      (in arg value) will be actual argument value
189   */
xml_get_first_item(const char * doc,const char * item)190  char * xml_get_first_item(const char *doc, const char *item)
191  {
192  	const char *match = item;
193  	int match_len = os_strlen(item);
194  	const char *tag, *tagname, *end;
195  	char *value;
196  
197  	/*
198  	 * This is crude: ignore any possible tag name conflicts and go right
199  	 * to the first tag of this name. This should be ok for the limited
200  	 * domain of UPnP messages.
201  	 */
202  	for (;;) {
203  		if (xml_next_tag(doc, &tag, &tagname, &end))
204  			return NULL;
205  		doc = end;
206  		if (!os_strncasecmp(tagname, match, match_len) &&
207  		    *tag != '/' &&
208  		    (tagname[match_len] == '>' ||
209  		     !isgraph(tagname[match_len]))) {
210  			break;
211  		}
212  	}
213  	end = doc;
214  	while (*end && *end != '<')
215  		end++;
216  	value = os_zalloc(1 + (end - doc));
217  	if (value == NULL)
218  		return NULL;
219  	os_memcpy(value, doc, end - doc);
220  	return value;
221  }
222  
223  
xml_get_base64_item(const char * data,const char * name,enum http_reply_code * ret)224  struct wpabuf * xml_get_base64_item(const char *data, const char *name,
225  				    enum http_reply_code *ret)
226  {
227  	char *msg;
228  	struct wpabuf *buf;
229  	unsigned char *decoded;
230  	size_t len;
231  
232  	msg = xml_get_first_item(data, name);
233  	if (msg == NULL) {
234  		*ret = UPNP_ARG_VALUE_INVALID;
235  		return NULL;
236  	}
237  
238  	decoded = base64_decode(msg, os_strlen(msg), &len);
239  	os_free(msg);
240  	if (decoded == NULL) {
241  		*ret = UPNP_OUT_OF_MEMORY;
242  		return NULL;
243  	}
244  
245  	buf = wpabuf_alloc_ext_data(decoded, len);
246  	if (buf == NULL) {
247  		os_free(decoded);
248  		*ret = UPNP_OUT_OF_MEMORY;
249  		return NULL;
250  	}
251  	return buf;
252  }
253