cryptlib  3.4.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Macros
net_url.c
Go to the documentation of this file.
1 /****************************************************************************
2 * *
3 * Network Stream URL Processing Functions *
4 * Copyright Peter Gutmann 1993-2007 *
5 * *
6 ****************************************************************************/
7 
8 #if defined( INC_ALL )
9  #include "stream_int.h"
10 #else
11  #include "io/stream_int.h"
12 #endif /* Compiler-specific includes */
13 
14 #ifdef USE_TCP
15 
16 /****************************************************************************
17 * *
18 * Utility Functions *
19 * *
20 ****************************************************************************/
21 
22 /* Sanity-check the URL parse state */
23 
24 #define checkUrlValue( data, dataLength, minLength, maxLength ) \
25  ( ( data == NULL && dataLength == 0 ) || \
26  ( data != NULL && \
27  dataLength >= minLength && dataLength <= maxLength ) )
28 
30 static BOOLEAN sanityCheckURL( const URL_INFO *urlInfo )
31  {
32  assert( isReadPtr( urlInfo, sizeof( URL_INFO ) ) );
33 
34  /* Make sure that the URL type is valid. URL_TYPE_NONE is valid since
35  it denotes an unrecognised URL type */
36  if( urlInfo->type < URL_TYPE_NONE || urlInfo->type >= URL_TYPE_LAST )
37  return( FALSE );
38 
39  /* Make sure the schema, user info, and location are either absent or
40  have valid values */
41  if( !checkUrlValue( urlInfo->schema, urlInfo->schemaLen,
42  2 + 3, 16 + 3 ) ) /* [...] + "://" */
43  return( FALSE );
44  if( !checkUrlValue( urlInfo->userInfo, urlInfo->userInfoLen,
45  1, CRYPT_MAX_TEXTSIZE ) )
46  return( FALSE );
47  if( !checkUrlValue( urlInfo->location, urlInfo->locationLen,
48  1, CRYPT_MAX_TEXTSIZE ) )
49  return( FALSE );
50 
51  /* The host always has to be present */
52  if( urlInfo->host == NULL || \
53  urlInfo->hostLen < 1 || urlInfo->hostLen > MAX_URL_SIZE )
54  return( FALSE );
55 
56  return( TRUE );
57  }
58 
59 /****************************************************************************
60 * *
61 * URL Processing Functions *
62 * *
63 ****************************************************************************/
64 
65 /* Check a schema */
66 
68 static int checkSchema( IN_BUFFER( schemaLen ) const void *schema,
69  IN_RANGE( 2, 16 ) const int schemaLen,
70  OUT_ENUM_OPT( URL_TYPE ) URL_TYPE *urlType,
71  IN_ENUM_OPT( URL_TYPE ) URL_TYPE urlTypeHint )
72  {
73  typedef struct {
74  BUFFER_FIXED( schemaLength ) \
75  const char *schema;
76  const int schemaLength;
77  const URL_TYPE type;
78  } URL_SCHEMA_INFO;
79  static const URL_SCHEMA_INFO FAR_BSS urlSchemaInfo[] = {
80  { "http://", 7, URL_TYPE_HTTP },
81  { "https://", 8, URL_TYPE_HTTPS },
82  { "ssh://", 6, URL_TYPE_SSH },
83  { "scp://", 6, URL_TYPE_SSH },
84  { "sftp://", 7, URL_TYPE_SSH },
85  { "cmp://", 6, URL_TYPE_CMP },
86  { "tsp://", 6, URL_TYPE_TSP },
87  { "ldap://", 7, URL_TYPE_LDAP },
88  { NULL, 0, URL_TYPE_NONE }, { NULL, 0, URL_TYPE_NONE }
89  };
90  URL_TYPE type;
91  int i;
92 
93  assert( isReadPtr( schema, schemaLen ) );
94  assert( isWritePtr( urlType, sizeof( URL_TYPE ) ) );
95 
96  REQUIRES( schemaLen >= 2 && schemaLen <= 16 );
97  REQUIRES( urlTypeHint >= URL_TYPE_NONE && urlTypeHint < URL_TYPE_LAST );
98 
99  /* Clear return value */
100  *urlType = URL_TYPE_NONE;
101 
102  /* Check whether the schema is one that we recognise */
103  for( i = 0;
104  urlSchemaInfo[ i ].type != URL_TYPE_NONE && \
105  i < FAILSAFE_ARRAYSIZE( urlSchemaInfo, URL_SCHEMA_INFO );
106  i++ )
107  {
108  if( urlSchemaInfo[ i ].schemaLength == schemaLen && \
109  !strCompare( urlSchemaInfo[ i ].schema, schema, schemaLen ) )
110  break;
111  }
112  ENSURES( i < FAILSAFE_ARRAYSIZE( urlSchemaInfo, URL_SCHEMA_INFO ) );
113  type = urlSchemaInfo[ i ].type;
114 
115  /* If there's no URL hint given, we're done */
116  if( urlTypeHint == URL_TYPE_NONE )
117  {
118  *urlType = type;
119 
120  return( CRYPT_OK );
121  }
122 
123  /* Make sure that the URL type matches the hint */
124  switch( urlTypeHint )
125  {
126  case URL_TYPE_HTTP:
127  /* An explicit HTTP URL must really be HTTP and not just a
128  generic HTTP/HTTPS mix */
129  if( type != URL_TYPE_HTTP )
130  return( CRYPT_ERROR_BADDATA );
131  break;
132 
133  case URL_TYPE_HTTPS:
134  /* A requirement for an HTTPS URL can also match an HTTP URL,
135  this type is used for SSL in which the use of HTTPS is
136  implied by the fact that an SSL session is being used even if
137  it's a straight HTTP URL */
138  if( type != URL_TYPE_HTTP && type != URL_TYPE_HTTPS )
139  return( CRYPT_ERROR_BADDATA );
140  break;
141 
142  case URL_TYPE_SSH:
143  if( type != URL_TYPE_SSH )
144  return( CRYPT_ERROR_BADDATA );
145  break;
146 
147  case URL_TYPE_CMP:
148  if( type != URL_TYPE_CMP )
149  return( CRYPT_ERROR_BADDATA );
150  break;
151 
152  case URL_TYPE_TSP:
153  if( type != URL_TYPE_TSP )
154  return( CRYPT_ERROR_BADDATA );
155  break;
156 
157  case URL_TYPE_LDAP:
158  if( type != URL_TYPE_LDAP )
159  return( CRYPT_ERROR_BADDATA );
160  break;
161 
162  default:
163  retIntError();
164  }
165  *urlType = type;
166 
167  return( CRYPT_OK );
168  }
169 
170 /* Parse a URI into:
171 
172  <schema>://[<user>@]<host>[:<port>]/<path>[?<query>] components
173 
174  This function is intended for use from the internal interface (i.e. to
175  parse URLs supplied by the caller to the cryptlib API) and not so much
176  for the external interface (i.e. URLs supplied by remote systems for
177  processing by cryptlib). Because of this it's rather more liberal with
178  what it'll accept than a generic URL parser would be */
179 
180 CHECK_RETVAL STDC_NONNULL_ARG( ( 1, 2 ) ) \
181 int parseURL( OUT URL_INFO *urlInfo,
182  IN_BUFFER( urlLen ) const char *url,
183  IN_LENGTH_SHORT const int urlLen,
184  IN_PORT_OPT const int defaultPort,
185  IN_ENUM( URL_TYPE ) const URL_TYPE urlTypeHint,
186  const BOOLEAN preParseOnly )
187  {
188  const char *strPtr, *hostName, *location;
189  int strLen, hostNameLen, locationLen, offset, minLen, status;
190 
191  assert( isWritePtr( urlInfo, sizeof( URL_INFO ) ) );
192  assert( isReadPtr( url, urlLen ) );
193 
194  REQUIRES( urlLen > 0 && urlLen < MAX_INTLENGTH_SHORT );
195  REQUIRES( defaultPort == CRYPT_UNUSED || \
196  ( defaultPort >= MIN_PORT_NUMBER && \
197  defaultPort <= MAX_PORT_NUMBER ) );
198  REQUIRES( urlTypeHint >= URL_TYPE_NONE && urlTypeHint < URL_TYPE_LAST );
199 
200  /* Clear return values */
201  memset( urlInfo, 0, sizeof( URL_INFO ) );
202  if( defaultPort != CRYPT_UNUSED )
203  urlInfo->port = defaultPort;
204 
205  /* Make sure that the input contains valid characters */
206  for( offset = 0; offset < urlLen; offset++ )
207  {
208  const int ch = byteToInt( url[ offset ] );
209 
210  if( ch <= 0 || ch > 0x7F || !isPrint( ch ) )
211  return( CRYPT_ERROR_BADDATA );
212  }
213 
214  /* Skip leading and trailing whitespace */
215  strLen = strStripWhitespace( &strPtr, url, urlLen );
216  if( strLen < MIN_DNS_SIZE || strLen >= MAX_URL_SIZE )
217  return( CRYPT_ERROR_BADDATA );
218  ANALYSER_HINT( strPtr != NULL );
219 
220  /* Strip syntactic sugar */
221  if( ( offset = strFindStr( strPtr, strLen, "://", 3 ) ) >= 0 )
222  {
223  /* Extract the URI schema */
224  if( offset < 2 || offset > 8 )
225  return( CRYPT_ERROR_BADDATA );
226  offset += 3; /* Adjust for "://" */
227  urlInfo->schema = strPtr;
228  urlInfo->schemaLen = offset;
229  strLen = strExtract( &strPtr, strPtr, offset, strLen );
230  if( strLen < MIN_DNS_SIZE || strLen > MAX_URL_SIZE )
231  return( CRYPT_ERROR_BADDATA );
232 
233  /* Check whether the schema is one that we recognise */
234  status = checkSchema( urlInfo->schema, urlInfo->schemaLen,
235  &urlInfo->type, urlTypeHint );
236  if( cryptStatusError( status ) )
237  return( status );
238  }
239 
240  /* Check for user info before an '@' sign */
241  if( ( offset = strFindCh( strPtr, strLen, '@' ) ) >= 0 )
242  {
243  const char *userInfo;
244  int userInfoLen;
245 
246  /* Extract the user info */
247  if( offset < 1 || offset > MAX_URL_SIZE )
248  return( CRYPT_ERROR_BADDATA );
249  userInfoLen = strExtract( &userInfo, strPtr, 0, offset );
250  if( userInfoLen < 1 || userInfoLen > CRYPT_MAX_TEXTSIZE )
251  return( CRYPT_ERROR_BADDATA );
252  urlInfo->userInfo = userInfo;
253  urlInfo->userInfoLen = userInfoLen;
254 
255  /* Skip the user info */
256  strLen = strExtract( &strPtr, strPtr, offset + 1, strLen );
257  if( strLen < MIN_DNS_SIZE || strLen > MAX_URL_SIZE )
258  return( CRYPT_ERROR_BADDATA );
259  }
260 
261  /* RFC 2732 requires that IPv6 addresses in URLs be delimited by square
262  brackets (at least one reason being that they use colons in their
263  string representation, which would conflict with the way that ports
264  are denoted in URLs) so if we find one at the start of the URI we
265  treat it as an IPv6 address */
266  if( *strPtr == '[' && \
267  ( strLen != 12 || strCompare( strPtr, "[Autodetect]", 12 ) ) )
268  {
269  /* Locate the end of the RFC 2732 IPv6 address. The returned offset
270  can't be greater than the length - 1 but we make the check
271  explicit here to be sure */
272  if( ( offset = strFindCh( strPtr, strLen, ']' ) ) <= 0 )
273  return( CRYPT_ERROR_BADDATA );
274  if( offset < 2 || offset > strLen - 1 || offset > CRYPT_MAX_TEXTSIZE )
275  return( CRYPT_ERROR_BADDATA );
276 
277  /* If we're only pre-parsing the IPv6 address for future use rather
278  than actually parsing it to pass to the network address-
279  resolution functions then we have to leave the square-bracket
280  delimiters in place for when we perform the actual parse later
281  on */
282  if( preParseOnly )
283  {
284  hostName = strPtr;
285  hostNameLen = offset + 1; /* Include ']' */
286  minLen = 4;
287  }
288  else
289  {
290  /* Extract the IPv6 address starting at position 1 (past the
291  '[') and ending at position 'offset' (before the ']') with
292  minimum length 2 */
293  hostNameLen = strExtract( &hostName, strPtr, 1, offset );
294  minLen = 2;
295  }
296  offset++; /* Skip ']' */
297  }
298  else
299  {
300  int offset2;
301 
302  /* It's a non-IPv6 host name, check whether there's anything
303  following the name */
304  offset = strFindCh( strPtr, strLen, ':' );
305  offset2 = strFindCh( strPtr, strLen, '/' );
306  if( offset < 0 )
307  offset = offset2;
308  else
309  {
310  REQUIRES( offset >= 0 );
311  if( offset2 >= 0 )
312  offset = min( offset, offset2 );
313  }
314  if( offset <= 0 )
315  {
316  /* The remaining string is the server name, we're done (the
317  string has already been trimmed in earlier code) */
318  urlInfo->host = strPtr;
319  urlInfo->hostLen = strLen;
320 
321  ENSURES( sanityCheckURL( urlInfo ) );
322 
323  return( CRYPT_OK );
324  }
325 
326  /* There's port/location info following the server name. Trailing
327  whitespace will be stripped later */
328  hostNameLen = strExtract( &hostName, strPtr, 0, offset );
329  minLen = MIN_DNS_SIZE;
330  }
331  if( hostNameLen < minLen || hostNameLen > CRYPT_MAX_TEXTSIZE )
332  return( CRYPT_ERROR_BADDATA );
333  urlInfo->host = hostName;
334  urlInfo->hostLen = hostNameLen;
335 
336  /* If there's nothing beyond the host name, we're done */
337  if( offset >= strLen )
338  {
339  ENSURES( sanityCheckURL( urlInfo ) );
340 
341  return( CRYPT_OK );
342  }
343  strLen = strExtract( &strPtr, strPtr, offset, strLen );
344  if( strLen == 1 && *strPtr == '/' )
345  {
346  /* URLs may end in an optional no-op trailing '/' */
347  ENSURES( sanityCheckURL( urlInfo ) );
348 
349  return( CRYPT_OK );
350  }
351  if( strLen < 3 || strLen > MAX_URL_SIZE )
352  return( CRYPT_ERROR_BADDATA );
353 
354  /* Check for a port after a ':' */
355  if( *strPtr == ':' )
356  {
357  int portStrLen, port;
358 
359  /* Skip the colon */
360  strLen = strExtract( &strPtr, strPtr, 1, strLen );
361  if( strLen < 2 || strLen > MAX_URL_SIZE )
362  return( CRYPT_ERROR_BADDATA );
363 
364  /* Get the port to connect to. If it's an invalid port we ignore it
365  and use the default one, which was set earlier */
366  for( portStrLen = 0;
367  portStrLen < strLen && isDigit( strPtr[ portStrLen ] );
368  portStrLen++ );
369  if( portStrLen < 2 || portStrLen > 8 )
370  return( CRYPT_ERROR_BADDATA );
371  status = strGetNumeric( strPtr, portStrLen, &port,
373  if( cryptStatusError( status ) )
374  return( status );
375  urlInfo->port = port;
376 
377  /* If there's nothing beyond the port, we're done */
378  if( strLen == portStrLen )
379  {
380  ENSURES( sanityCheckURL( urlInfo ) );
381 
382  return( CRYPT_OK );
383  }
384  strLen = strExtract( &strPtr, strPtr, portStrLen, strLen );
385  if( strLen == 1 && *strPtr == '/' )
386  {
387  /* URLs may end in an optional no-op trailing '/' */
388  ENSURES( sanityCheckURL( urlInfo ) );
389 
390  return( CRYPT_OK );
391  }
392  if( strLen < 3 || strLen > MAX_URL_SIZE )
393  return( CRYPT_ERROR_BADDATA );
394  }
395 
396  /* What's left has to be a location */
397  if( *strPtr != '/' )
398  return( CRYPT_ERROR_BADDATA );
399 
400  /* The location string includes the leading '/' so we set the start
401  offset to 0 and not 1 */
402  locationLen = strExtract( &location, strPtr, 0, strLen );
403  if( locationLen < 3 || locationLen > MAX_URL_SIZE )
404  return( CRYPT_ERROR_BADDATA );
405  urlInfo->location = location;
406  urlInfo->locationLen = locationLen;
407 
408  ENSURES( sanityCheckURL( urlInfo ) );
409 
410  return( CRYPT_OK );
411  }
412 
413 #endif /* USE_TCP */