1 /* 2 * UPnP XML helper routines 3 * Copyright (c) 2000-2003 Intel Corporation 4 * Copyright (c) 2006-2007 Sony Corporation 5 * Copyright (c) 2008-2009 Atheros Communications 6 * Copyright (c) 2009, Jouni Malinen <j@w1.fi> 7 * 8 * See wps_upnp.c for more details on licensing and code history. 9 */ 10 11 #include "includes.h" 12 13 #include "common.h" 14 #include "base64.h" 15 #include "http.h" 16 #include "upnp_xml.h" 17 18 19 /* 20 * XML parsing and formatting 21 * 22 * XML is a markup language based on unicode; usually (and in our case, 23 * always!) based on utf-8. utf-8 uses a variable number of bytes per 24 * character. utf-8 has the advantage that all non-ASCII unicode characters are 25 * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII 26 * characters are single ascii bytes, thus we can use typical text processing. 27 * 28 * (One other interesting thing about utf-8 is that it is possible to look at 29 * any random byte and determine if it is the first byte of a character as 30 * versus a continuation byte). 31 * 32 * The base syntax of XML uses a few ASCII punctionation characters; any 33 * characters that would appear in the payload data are rewritten using 34 * sequences, e.g., & for ampersand(&) and < for left angle bracket (<). 35 * Five such escapes total (more can be defined but that does not apply to our 36 * case). Thus we can safely parse for angle brackets etc. 37 * 38 * XML describes tree structures of tagged data, with each element beginning 39 * with an opening tag <label> and ending with a closing tag </label> with 40 * matching label. (There is also a self-closing tag <label/> which is supposed 41 * to be equivalent to <label></label>, i.e., no payload, but we are unlikely 42 * to see it for our purpose). 43 * 44 * Actually the opening tags are a little more complicated because they can 45 * contain "attributes" after the label (delimited by ascii space or tab chars) 46 * of the form attribute_label="value" or attribute_label='value'; as it turns 47 * out we do not have to read any of these attributes, just ignore them. 48 * 49 * Labels are any sequence of chars other than space, tab, right angle bracket 50 * (and ?), but may have an inner structure of <namespace><colon><plain_label>. 51 * As it turns out, we can ignore the namespaces, in fact we can ignore the 52 * entire tree hierarchy, because the plain labels we are looking for will be 53 * unique (not in general, but for this application). We do however have to be 54 * careful to skip over the namespaces. 55 * 56 * In generating XML we have to be more careful, but that is easy because 57 * everything we do is pretty canned. The only real care to take is to escape 58 * any special chars in our payload. 59 */ 60 61 /** 62 * xml_next_tag - Advance to next tag 63 * @in: Input 64 * @out: OUT: start of tag just after '<' 65 * @out_tagname: OUT: start of name of tag, skipping namespace 66 * @end: OUT: one after tag 67 * Returns: 0 on success, 1 on failure 68 * 69 * A tag has form: 70 * <left angle bracket><...><right angle bracket> 71 * Within the angle brackets, there is an optional leading forward slash (which 72 * makes the tag an ending tag), then an optional leading label (followed by 73 * colon) and then the tag name itself. 74 * 75 * Note that angle brackets present in the original data must have been encoded 76 * as < and > so they will not trouble us. 77 */ 78 int xml_next_tag(const char *in, const char **out, 79 const char **out_tagname, const char **end) 80 { 81 while (*in && *in != '<') 82 in++; 83 if (*in != '<') 84 return 1; 85 *out = ++in; 86 if (*in == '/') 87 in++; 88 *out_tagname = in; /* maybe */ 89 while (isalnum(*in) || *in == '-') 90 in++; 91 if (*in == ':') 92 *out_tagname = ++in; 93 while (*in && *in != '>') 94 in++; 95 if (*in != '>') 96 return 1; 97 *end = ++in; 98 return 0; 99 } 100 101 102 /* xml_data_encode -- format data for xml file, escaping special characters. 103 * 104 * Note that we assume we are using utf8 both as input and as output! 105 * In utf8, characters may be classed as follows: 106 * 0xxxxxxx(2) -- 1 byte ascii char 107 * 11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80 108 * 110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here) 109 * 1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here) 110 * 11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here) 111 * 10xxxxxx(2) -- extension byte (6 payload bits per byte) 112 * Some values implied by the above are however illegal because they 113 * do not represent unicode chars or are not the shortest encoding. 114 * Actually, we can almost entirely ignore the above and just do 115 * text processing same as for ascii text. 116 * 117 * XML is written with arbitrary unicode characters, except that five 118 * characters have special meaning and so must be escaped where they 119 * appear in payload data... which we do here. 120 */ 121 void xml_data_encode(struct wpabuf *buf, const char *data, int len) 122 { 123 int i; 124 for (i = 0; i < len; i++) { 125 u8 c = ((u8 *) data)[i]; 126 if (c == '<') { 127 wpabuf_put_str(buf, "<"); 128 continue; 129 } 130 if (c == '>') { 131 wpabuf_put_str(buf, ">"); 132 continue; 133 } 134 if (c == '&') { 135 wpabuf_put_str(buf, "&"); 136 continue; 137 } 138 if (c == '\'') { 139 wpabuf_put_str(buf, "'"); 140 continue; 141 } 142 if (c == '"') { 143 wpabuf_put_str(buf, """); 144 continue; 145 } 146 /* 147 * We could try to represent control characters using the 148 * sequence: &#x; where x is replaced by a hex numeral, but not 149 * clear why we would do this. 150 */ 151 wpabuf_put_u8(buf, c); 152 } 153 } 154 155 156 /* xml_add_tagged_data -- format tagged data as a new xml line. 157 * 158 * tag must not have any special chars. 159 * data may have special chars, which are escaped. 160 */ 161 void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data) 162 { 163 wpabuf_printf(buf, "<%s>", tag); 164 xml_data_encode(buf, data, os_strlen(data)); 165 wpabuf_printf(buf, "</%s>\n", tag); 166 } 167 168 169 /* A POST body looks something like (per upnp spec): 170 * <?xml version="1.0"?> 171 * <s:Envelope 172 * xmlns:s="http://schemas.xmlsoap.org/soap/envelope/" 173 * s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/"> 174 * <s:Body> 175 * <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v"> 176 * <argumentName>in arg value</argumentName> 177 * other in args and their values go here, if any 178 * </u:actionName> 179 * </s:Body> 180 * </s:Envelope> 181 * 182 * where : 183 * s: might be some other namespace name followed by colon 184 * u: might be some other namespace name followed by colon 185 * actionName will be replaced according to action requested 186 * schema following actionName will be WFA scheme instead 187 * argumentName will be actual argument name 188 * (in arg value) will be actual argument value 189 */ 190 char * xml_get_first_item(const char *doc, const char *item) 191 { 192 const char *match = item; 193 int match_len = os_strlen(item); 194 const char *tag, *tagname, *end; 195 char *value; 196 197 /* 198 * This is crude: ignore any possible tag name conflicts and go right 199 * to the first tag of this name. This should be ok for the limited 200 * domain of UPnP messages. 201 */ 202 for (;;) { 203 if (xml_next_tag(doc, &tag, &tagname, &end)) 204 return NULL; 205 doc = end; 206 if (!os_strncasecmp(tagname, match, match_len) && 207 *tag != '/' && 208 (tagname[match_len] == '>' || 209 !isgraph(tagname[match_len]))) { 210 break; 211 } 212 } 213 end = doc; 214 while (*end && *end != '<') 215 end++; 216 value = os_zalloc(1 + (end - doc)); 217 if (value == NULL) 218 return NULL; 219 os_memcpy(value, doc, end - doc); 220 return value; 221 } 222 223 224 struct wpabuf * xml_get_base64_item(const char *data, const char *name, 225 enum http_reply_code *ret) 226 { 227 char *msg; 228 struct wpabuf *buf; 229 unsigned char *decoded; 230 size_t len; 231 232 msg = xml_get_first_item(data, name); 233 if (msg == NULL) { 234 *ret = UPNP_ARG_VALUE_INVALID; 235 return NULL; 236 } 237 238 decoded = base64_decode(msg, os_strlen(msg), &len); 239 os_free(msg); 240 if (decoded == NULL) { 241 *ret = UPNP_OUT_OF_MEMORY; 242 return NULL; 243 } 244 245 buf = wpabuf_alloc_ext_data(decoded, len); 246 if (buf == NULL) { 247 os_free(decoded); 248 *ret = UPNP_OUT_OF_MEMORY; 249 return NULL; 250 } 251 return buf; 252 } 253