| 1 |
#!/usr/bin/env python |
|---|
| 2 |
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 3 |
##~ Copyright (C) 2002-2004 TechGame Networks, LLC. |
|---|
| 4 |
##~ |
|---|
| 5 |
##~ This library is free software; you can redistribute it and/or |
|---|
| 6 |
##~ modify it under the terms of the BSD style License as found in the |
|---|
| 7 |
##~ LICENSE file included with this distribution. |
|---|
| 8 |
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 9 |
|
|---|
| 10 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 11 |
#~ Imports |
|---|
| 12 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 13 |
|
|---|
| 14 |
import re |
|---|
| 15 |
|
|---|
| 16 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 17 |
#~ Definitions |
|---|
| 18 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 19 |
|
|---|
| 20 |
class URIPathScheme(object): |
|---|
| 21 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 22 |
#~ Constants / Variables / Etc. |
|---|
| 23 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 24 |
|
|---|
| 25 |
segments = () |
|---|
| 26 |
attributes = None |
|---|
| 27 |
|
|---|
| 28 |
_pathsep = u'/' |
|---|
| 29 |
_pathattrsep = u';' |
|---|
| 30 |
|
|---|
| 31 |
_parentPathSegment = u'..' |
|---|
| 32 |
_currentPathSegment = u'.' |
|---|
| 33 |
|
|---|
| 34 |
_currentPathPart = u'' # empty string for joining |
|---|
| 35 |
_parentPathPart = u'..' |
|---|
| 36 |
|
|---|
| 37 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 38 |
#~ Public Methods |
|---|
| 39 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 40 |
|
|---|
| 41 |
def __init__(self, pathOrSegments=u'', attributes=NotImplemented): |
|---|
| 42 |
if isinstance(pathOrSegments, basestring): |
|---|
| 43 |
if self._pathattrsep in pathOrSegments: |
|---|
| 44 |
pathOrSegments, self.attributes = pathOrSegments.split(self._pathattrsep, 1) |
|---|
| 45 |
self.setSegments(pathOrSegments.split(self._pathsep)) |
|---|
| 46 |
elif isinstance(pathOrSegments, self.__class__): |
|---|
| 47 |
self.__dict__.update(pathOrSegments.__dict__) |
|---|
| 48 |
else: |
|---|
| 49 |
self.setSegments(pathOrSegments) |
|---|
| 50 |
|
|---|
| 51 |
if attributes is not NotImplemented: |
|---|
| 52 |
self.attributes = attributes |
|---|
| 53 |
|
|---|
| 54 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 55 |
|
|---|
| 56 |
def fromSegments(klass, segments, *args, **kw): |
|---|
| 57 |
return klass(segments, *args, **kw) |
|---|
| 58 |
fromSegments = classmethod(fromSegments) |
|---|
| 59 |
|
|---|
| 60 |
def fromPathString(klass, path, *args, **kw): |
|---|
| 61 |
return klass(path, *args, **kw) |
|---|
| 62 |
fromPathString = classmethod(fromPathString ) |
|---|
| 63 |
|
|---|
| 64 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 65 |
|
|---|
| 66 |
def __len__(self): |
|---|
| 67 |
return len(self.getSegments()) |
|---|
| 68 |
|
|---|
| 69 |
def __getitem__(self, key): |
|---|
| 70 |
result = self.getSegments().__getitem__(key) |
|---|
| 71 |
if isinstance(key, slice): |
|---|
| 72 |
result = self.fromSegments(result) |
|---|
| 73 |
return result |
|---|
| 74 |
|
|---|
| 75 |
def __contains__(self, other): |
|---|
| 76 |
return self.contains(other) |
|---|
| 77 |
|
|---|
| 78 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 79 |
|
|---|
| 80 |
def contains(self, other, proper=True): |
|---|
| 81 |
subpath = self.asSubpath(other, True) |
|---|
| 82 |
if proper: |
|---|
| 83 |
return bool(subpath) |
|---|
| 84 |
else: |
|---|
| 85 |
return subpath is not None |
|---|
| 86 |
|
|---|
| 87 |
def isSubpath(self, other, tolerant=True): |
|---|
| 88 |
subpath = self.asSubpath(other, tolerant) |
|---|
| 89 |
return subpath is not None |
|---|
| 90 |
|
|---|
| 91 |
def asSubpath(self, other, tolerant=False): |
|---|
| 92 |
other = self.fromPathString(other) |
|---|
| 93 |
otherSeg = other.getSegments(True) # filter out and 'curdirs' |
|---|
| 94 |
selfSeg = self.getSegments(True) |
|---|
| 95 |
valid = (selfSeg == otherSeg[:len(selfSeg)]) |
|---|
| 96 |
if valid: |
|---|
| 97 |
return self.fromSegments(otherSeg[len(selfSeg):]) |
|---|
| 98 |
elif tolerant: |
|---|
| 99 |
return None |
|---|
| 100 |
else: |
|---|
| 101 |
raise ValueError("%r is not a valid subpath of %r" % (other, self)) |
|---|
| 102 |
|
|---|
| 103 |
def getSegments(self, simplify=False): |
|---|
| 104 |
if simplify: |
|---|
| 105 |
return filter(None, self.segments) |
|---|
| 106 |
else: |
|---|
| 107 |
return self.segments |
|---|
| 108 |
def setSegments(self, segments): |
|---|
| 109 |
self.segments = segments or [] |
|---|
| 110 |
|
|---|
| 111 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 112 |
|
|---|
| 113 |
def asPathString(self, root=False): |
|---|
| 114 |
result = self._pathsep.join(self.getSegments()) or '' |
|---|
| 115 |
if self.attributes: |
|---|
| 116 |
result = self._pathattrsep.join((result, self.attributes)) |
|---|
| 117 |
if root and not result.startswith(u'/'): |
|---|
| 118 |
result = self._pathsep.join((u'', result)) |
|---|
| 119 |
return result |
|---|
| 120 |
def __unicode__(self): |
|---|
| 121 |
return unicode(self.asPathString()) |
|---|
| 122 |
def __str__(self): |
|---|
| 123 |
return str(self.asPathString()) |
|---|
| 124 |
def __repr__(self): |
|---|
| 125 |
return '<%s.%s "%s">' % (self.__class__.__module__, self.__class__.__name__, str(self)) |
|---|
| 126 |
def __iter__(self): |
|---|
| 127 |
return iter(self.getSegments()) |
|---|
| 128 |
|
|---|
| 129 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 130 |
|
|---|
| 131 |
def normalized(self): |
|---|
| 132 |
segments = self.getSegments() |
|---|
| 133 |
self.setSegments([]) |
|---|
| 134 |
self.addPath(segments, andAttrs=False) |
|---|
| 135 |
return self |
|---|
| 136 |
|
|---|
| 137 |
def addPath(self, pathOrSegments, andAttrs=False): |
|---|
| 138 |
if isinstance(pathOrSegments, basestring): |
|---|
| 139 |
path = self.fromPathString(pathOrSegments) |
|---|
| 140 |
else: |
|---|
| 141 |
path = pathOrSegments |
|---|
| 142 |
|
|---|
| 143 |
for each in iter(path): |
|---|
| 144 |
self.add(each) |
|---|
| 145 |
|
|---|
| 146 |
if andAttrs: |
|---|
| 147 |
self.attributes = path.attributes |
|---|
| 148 |
return self |
|---|
| 149 |
|
|---|
| 150 |
def add(self, segment): |
|---|
| 151 |
if segment == self._currentPathSegment: |
|---|
| 152 |
self.addCurrentSegment() |
|---|
| 153 |
elif segment == self._parentPathSegment: |
|---|
| 154 |
self.addParentSegment() |
|---|
| 155 |
else: |
|---|
| 156 |
self.addSegment(segment) |
|---|
| 157 |
return self |
|---|
| 158 |
|
|---|
| 159 |
def addParentSegment(self): |
|---|
| 160 |
tos = self.getTopSegment() |
|---|
| 161 |
if tos is None: |
|---|
| 162 |
# handles both empty, and tos is '' cases |
|---|
| 163 |
self.getSegments().append(self._parentPathPart) |
|---|
| 164 |
elif not tos: |
|---|
| 165 |
# pop 1 and recurse |
|---|
| 166 |
self.getSegments().pop() |
|---|
| 167 |
self.addParentSegment() |
|---|
| 168 |
elif tos == self._parentPathPart: |
|---|
| 169 |
# well, just go up another directory... |
|---|
| 170 |
self.getSegments().append(self._parentPathPart) |
|---|
| 171 |
else: |
|---|
| 172 |
# we have a vaild parent, so just remove it |
|---|
| 173 |
self.getSegments().pop() |
|---|
| 174 |
|
|---|
| 175 |
# this is implied |
|---|
| 176 |
self.addCurrentSegment() |
|---|
| 177 |
|
|---|
| 178 |
def addCurrentSegment(self): |
|---|
| 179 |
tos = self.getTopSegment() |
|---|
| 180 |
if tos: |
|---|
| 181 |
# path is "a/b" ==> make it "a/b/" |
|---|
| 182 |
# or path is "a/.." ==> make it "a/../" |
|---|
| 183 |
self.getSegments().append(self._currentPathPart) |
|---|
| 184 |
|
|---|
| 185 |
def addSegment(self, segment): |
|---|
| 186 |
tos = self.getTopSegment() |
|---|
| 187 |
if not tos: |
|---|
| 188 |
# handles both empty, and tos is '' cases |
|---|
| 189 |
self.getSegments()[-1:] = [segment] |
|---|
| 190 |
else: |
|---|
| 191 |
self.getSegments().append(segment) |
|---|
| 192 |
|
|---|
| 193 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 194 |
|
|---|
| 195 |
def getTopSegment(self): |
|---|
| 196 |
if self.getSegments(): |
|---|
| 197 |
return self.getSegments()[-1] |
|---|
| 198 |
else: |
|---|
| 199 |
return None |
|---|
| 200 |
|
|---|
| 201 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 202 |
|
|---|
| 203 |
def splitCommon(*paths, **kw): |
|---|
| 204 |
"""Returns (common, remaining) where all of the segments in common are |
|---|
| 205 |
shared among the otherPaths. Remaining contains the "rest" of segments |
|---|
| 206 |
that not shared among the otherPaths.""" |
|---|
| 207 |
import itertools |
|---|
| 208 |
common, remaining = [], [] |
|---|
| 209 |
paths = map(iter, paths) |
|---|
| 210 |
pathColletion = itertools.imap(None, *paths) |
|---|
| 211 |
for pathSegments in pathColletion: |
|---|
| 212 |
# if all elements are equal (assumes transitivity) |
|---|
| 213 |
if pathSegments[:-1] == pathSegments[1:]: |
|---|
| 214 |
common.append(pathSegments[0]) |
|---|
| 215 |
continue |
|---|
| 216 |
else: |
|---|
| 217 |
# oops... had one non-equal one... therefore, they are all |
|---|
| 218 |
# different from now on ;) |
|---|
| 219 |
for segment, restOfPath in map(None, pathSegments, paths): |
|---|
| 220 |
remaining.append([segment] + list(restOfPath)) |
|---|
| 221 |
break |
|---|
| 222 |
else: |
|---|
| 223 |
for restOfPath in paths: |
|---|
| 224 |
remaining.append(list(restOfPath)) |
|---|
| 225 |
|
|---|
| 226 |
return (common, remaining) |
|---|
| 227 |
|
|---|
| 228 |
def getRelative(self, other): |
|---|
| 229 |
"""Returns the path necessary to get from `self` to `other` """ |
|---|
| 230 |
common, (myPart, otherPart) = self.splitCommon(other) |
|---|
| 231 |
return self.fromSegments(['..',]*(len(myPart)-1) + otherPart) |
|---|
| 232 |
|
|---|
| 233 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 234 |
|
|---|
| 235 |
class URI(object): |
|---|
| 236 |
""" |
|---|
| 237 |
From RFC 2396 at <http://www.ietf.org/rfc/rfc2396.txt> |
|---|
| 238 |
... |
|---|
| 239 |
B. Parsing a URI Reference with a Regular Expression |
|---|
| 240 |
|
|---|
| 241 |
As described in Section 4.3, the generic URI syntax is not sufficient |
|---|
| 242 |
to disambiguate the components of some forms of URI. Since the |
|---|
| 243 |
"greedy algorithm" described in that section is identical to the |
|---|
| 244 |
disambiguation method used by POSIX regular expressions, it is |
|---|
| 245 |
natural and commonplace to use a regular expression for parsing the |
|---|
| 246 |
potential four components and fragment identifier of a URI reference. |
|---|
| 247 |
|
|---|
| 248 |
The following line is the regular expression for breaking-down a URI |
|---|
| 249 |
reference into its components. |
|---|
| 250 |
|
|---|
| 251 |
^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|---|
| 252 |
12 3 4 5 6 7 8 9 |
|---|
| 253 |
|
|---|
| 254 |
The numbers in the second line above are only to assist readability; |
|---|
| 255 |
they indicate the reference points for each subexpression (i.e., each |
|---|
| 256 |
paired parenthesis). We refer to the value matched for subexpression |
|---|
| 257 |
<n> as $<n>. For example, matching the above expression to |
|---|
| 258 |
|
|---|
| 259 |
http://www.ics.uci.edu/pub/ietf/uri/#Related |
|---|
| 260 |
|
|---|
| 261 |
results in the following subexpression matches: |
|---|
| 262 |
|
|---|
| 263 |
$1 = http: |
|---|
| 264 |
$2 = http |
|---|
| 265 |
$3 = //www.ics.uci.edu |
|---|
| 266 |
$4 = www.ics.uci.edu |
|---|
| 267 |
$5 = /pub/ietf/uri/ |
|---|
| 268 |
$6 = <undefined> |
|---|
| 269 |
$7 = <undefined> |
|---|
| 270 |
$8 = #Related |
|---|
| 271 |
$9 = Related |
|---|
| 272 |
|
|---|
| 273 |
where <undefined> indicates that the component is not present, as is |
|---|
| 274 |
the case for the query component in the above example. Therefore, we |
|---|
| 275 |
can determine the value of the four components and fragment as |
|---|
| 276 |
|
|---|
| 277 |
scheme = $2 |
|---|
| 278 |
authority = $4 |
|---|
| 279 |
path = $5 |
|---|
| 280 |
query = $7 |
|---|
| 281 |
fragment = $9 |
|---|
| 282 |
|
|---|
| 283 |
and, going in the opposite direction, we can recreate a URI reference |
|---|
| 284 |
from its components using the algorithm in step 7 of Section 5.2. |
|---|
| 285 |
|
|---|
| 286 |
""" |
|---|
| 287 |
|
|---|
| 288 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 289 |
#~ Constants / Variables / Etc. |
|---|
| 290 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 291 |
|
|---|
| 292 |
_reURItype = r'^(?:(?P<scheme>[^:/?#@]+)(?P<_schemesep>:))?' |
|---|
| 293 |
_reURIauthority = r'(?:(?P<_authoritysep>//)(?P<authority>[^/?#]*))?' |
|---|
| 294 |
_reURIpath = r'(?P<path>(?P<_pathsep>/)?[^?#]+)?' |
|---|
| 295 |
_reURIquery = r'(?:(?P<_querysep>\?)(?P<query>[^#]*))?' |
|---|
| 296 |
_reURIfragment = r'(?:(?P<_fragmentsep>#)(?P<fragment>.*))?' |
|---|
| 297 |
|
|---|
| 298 |
_reURI = _reURItype + _reURIauthority + _reURIpath + _reURIquery + _reURIfragment |
|---|
| 299 |
_reURIPattern = re.compile(_reURI) |
|---|
| 300 |
|
|---|
| 301 |
# Default values |
|---|
| 302 |
_uriParts = { |
|---|
| 303 |
'scheme': None, |
|---|
| 304 |
'authority': None, |
|---|
| 305 |
'path': None, |
|---|
| 306 |
'query': None, |
|---|
| 307 |
'fragment': None, |
|---|
| 308 |
|
|---|
| 309 |
'_schemesep': u':', |
|---|
| 310 |
'_authoritysep': u'//', |
|---|
| 311 |
'_pathsep': u'/', |
|---|
| 312 |
'_pathattrsep': u';', |
|---|
| 313 |
'_querysep': u'?', |
|---|
| 314 |
'_fragmentsep': u'#', |
|---|
| 315 |
} |
|---|
| 316 |
locals().update(_uriParts) |
|---|
| 317 |
|
|---|
| 318 |
PathScheme = URIPathScheme |
|---|
| 319 |
|
|---|
| 320 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 321 |
#~ Special |
|---|
| 322 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 323 |
|
|---|
| 324 |
def __init__(self, uri=None, standardize=False, **kw): |
|---|
| 325 |
if uri is not None: |
|---|
| 326 |
self.setURI(uri, **kw) |
|---|
| 327 |
|
|---|
| 328 |
if standardize: |
|---|
| 329 |
self.standardize() |
|---|
| 330 |
|
|---|
| 331 |
def fromURI(klass, uriOther, copy=False): |
|---|
| 332 |
if not copy and isinstance(uriOther, klass): |
|---|
| 333 |
return uriOther |
|---|
| 334 |
else: |
|---|
| 335 |
return klass(uriOther) |
|---|
| 336 |
fromURI = classmethod(fromURI) |
|---|
| 337 |
|
|---|
| 338 |
def fromURIParts(klass, uriParts): |
|---|
| 339 |
r = klass() |
|---|
| 340 |
r.setURIParts(uriParts) |
|---|
| 341 |
return r |
|---|
| 342 |
fromURIParts = classmethod(fromURIParts) |
|---|
| 343 |
|
|---|
| 344 |
def copyFromURIParts(self, uriParts): |
|---|
| 345 |
return self.fromURIParts(uriParts) |
|---|
| 346 |
|
|---|
| 347 |
def copyFromURI(self, uriOther): |
|---|
| 348 |
return self.fromURI(uriOther, copy=True) |
|---|
| 349 |
|
|---|
| 350 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 351 |
|
|---|
| 352 |
def __getstate__(self): |
|---|
| 353 |
return dict(uri=self.getURIValue()) |
|---|
| 354 |
def __setstate__(self, data): |
|---|
| 355 |
uri = data.pop('uri', '') |
|---|
| 356 |
self.setURI(uri) |
|---|
| 357 |
|
|---|
| 358 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 359 |
|
|---|
| 360 |
def asURIStr(self, formatStr='<URI:%s>'): |
|---|
| 361 |
return formatStr % (self,) |
|---|
| 362 |
def __repr__(self): |
|---|
| 363 |
return self.asURIStr() |
|---|
| 364 |
def __len__(self): |
|---|
| 365 |
return len(self.getURIValue()) |
|---|
| 366 |
def __unicode__(self): |
|---|
| 367 |
return unicode(self.getURIValue()) |
|---|
| 368 |
def __str__(self): |
|---|
| 369 |
return str(self.getURIValue()) |
|---|
| 370 |
def __cmp__(self, other): |
|---|
| 371 |
return cmp(str(self), str(other)) |
|---|
| 372 |
def __hash__(self): |
|---|
| 373 |
return hash(str(self)) |
|---|
| 374 |
|
|---|
| 375 |
def getURIParts(self): |
|---|
| 376 |
return { |
|---|
| 377 |
'scheme': self.scheme, |
|---|
| 378 |
'authority': self.authority, |
|---|
| 379 |
'path': self.path, |
|---|
| 380 |
'query': self.query, |
|---|
| 381 |
'fragment': self.fragment, |
|---|
| 382 |
} |
|---|
| 383 |
def setURIParts(self, kwparam={}, **kw): |
|---|
| 384 |
items = kwparam.items() + kw.items() |
|---|
| 385 |
for key, value in items: |
|---|
| 386 |
# but only if they are in known names |
|---|
| 387 |
if key in self._uriParts: |
|---|
| 388 |
setattr(self, key, value) |
|---|
| 389 |
def clearURIParts(self): |
|---|
| 390 |
for key in self._uriParts.keys(): |
|---|
| 391 |
if key in self.__dict__: |
|---|
| 392 |
delattr(self, key) |
|---|
| 393 |
|
|---|
| 394 |
def standardize(self): |
|---|
| 395 |
self.standardizeSeperators() |
|---|
| 396 |
return self |
|---|
| 397 |
def standardizeSeperators(self): |
|---|
| 398 |
for key in self.__dict__.keys(): |
|---|
| 399 |
if key.endswith('sep'): |
|---|
| 400 |
delattr(self, key) |
|---|
| 401 |
|
|---|
| 402 |
def getPathEx(self, *args, **kw): |
|---|
| 403 |
return self.PathScheme(self.path, *args, **kw) |
|---|
| 404 |
def setPathEx(self, path, *args, **kw): |
|---|
| 405 |
if args or kw: |
|---|
| 406 |
self.path = path.asPathString(*args, **kw) |
|---|
| 407 |
else: |
|---|
| 408 |
self.path = unicode(path) |
|---|
| 409 |
|
|---|
| 410 |
def normalizePath(self, path=None): |
|---|
| 411 |
if path is None: |
|---|
| 412 |
path = self.path |
|---|
| 413 |
self.setPathEx(self.PathScheme(path).normalized()) |
|---|
| 414 |
return self.path |
|---|
| 415 |
|
|---|
| 416 |
def isAbsPath(self): |
|---|
| 417 |
if self.path: |
|---|
| 418 |
return self.path.startswith(self._pathsep) |
|---|
| 419 |
else: |
|---|
| 420 |
return False |
|---|
| 421 |
|
|---|
| 422 |
def getURI(self): |
|---|
| 423 |
return self |
|---|
| 424 |
def getURIValue(self): |
|---|
| 425 |
""" |
|---|
| 426 |
Pseudocode from RFC 2396 at http://www.ietf.org/rfc/rfc2396.txt |
|---|
| 427 |
Section 5.2, step 7 to recreate a URI reference from its components. |
|---|
| 428 |
""" |
|---|
| 429 |
result = [] |
|---|
| 430 |
if self.scheme is not None: |
|---|
| 431 |
result.append(self.scheme) |
|---|
| 432 |
if self._schemesep is not None: |
|---|
| 433 |
result.append(self._schemesep) |
|---|
| 434 |
if self.authority is not None: |
|---|
| 435 |
if self._authoritysep is not None: |
|---|
| 436 |
result.append(self._authoritysep) |
|---|
| 437 |
result.append(self.authority) |
|---|
| 438 |
if self.path is not None: |
|---|
| 439 |
result.append(self.path) |
|---|
| 440 |
if self.query is not None: |
|---|
| 441 |
if self._querysep is not None: |
|---|
| 442 |
result.append(self._querysep) |
|---|
| 443 |
result.append(self.query) |
|---|
| 444 |
if self.fragment is not None: |
|---|
| 445 |
if self._fragmentsep is not None: |
|---|
| 446 |
result.append(self._fragmentsep) |
|---|
| 447 |
result.append(self.fragment) |
|---|
| 448 |
result = map(unicode, result) |
|---|
| 449 |
return u''.join(result) |
|---|
| 450 |
def setURI(self, uri=None, **uriParts): |
|---|
| 451 |
self.clearURIParts() |
|---|
| 452 |
if isinstance(uri, URI): |
|---|
| 453 |
self.setURIParts(uri.getURIParts()) |
|---|
| 454 |
elif uri is not None: |
|---|
| 455 |
uri = unicode(uri) |
|---|
| 456 |
match = self._reURIPattern.match(uri) |
|---|
| 457 |
if not match: |
|---|
| 458 |
raise ValueError("URI string is not parseable: %r" % uri) |
|---|
| 459 |
uriParts.update(match.groupdict()) |
|---|
| 460 |
|
|---|
| 461 |
if uriParts: |
|---|
| 462 |
self.setURIParts(uriParts) |
|---|
| 463 |
uri = property(getURIValue, setURI) |
|---|
| 464 |
|
|---|
| 465 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 466 |
|
|---|
| 467 |
def join(self, uriRelative, append=False): |
|---|
| 468 |
"""Resolve Relative URIs -- returns a new URI of the join |
|---|
| 469 |
|
|---|
| 470 |
from http://www.faqs.org/rfcs/rfc1808.html:: |
|---|
| 471 |
|
|---|
| 472 |
4. Resolving Relative URLs |
|---|
| 473 |
|
|---|
| 474 |
This section describes an example algorithm for resolving URLs within |
|---|
| 475 |
a context in which the URLs may be relative, such that the result is |
|---|
| 476 |
always a URL in absolute form. Although this algorithm cannot |
|---|
| 477 |
guarantee that the resulting URL will equal that intended by the |
|---|
| 478 |
original author, it does guarantee that any valid URL (relative or |
|---|
| 479 |
absolute) can be consistently transformed to an absolute form given a |
|---|
| 480 |
valid base URL. |
|---|
| 481 |
|
|---|
| 482 |
The following steps are performed in order: |
|---|
| 483 |
|
|---|
| 484 |
Step 1: The base URL is established according to the rules of |
|---|
| 485 |
Section 3. If the base URL is the empty string (unknown), |
|---|
| 486 |
the embedded URL is interpreted as an absolute URL and |
|---|
| 487 |
we are done. |
|---|
| 488 |
|
|---|
| 489 |
Step 2: Both the base and embedded URLs are parsed into their |
|---|
| 490 |
component parts as described in Section 2.4. |
|---|
| 491 |
|
|---|
| 492 |
a) If the embedded URL is entirely empty, it inherits the |
|---|
| 493 |
entire base URL (i.e., is set equal to the base URL) |
|---|
| 494 |
and we are done. |
|---|
| 495 |
|
|---|
| 496 |
b) If the embedded URL starts with a scheme name, it is |
|---|
| 497 |
interpreted as an absolute URL and we are done. |
|---|
| 498 |
|
|---|
| 499 |
c) Otherwise, the embedded URL inherits the scheme of |
|---|
| 500 |
the base URL. |
|---|
| 501 |
|
|---|
| 502 |
Step 3: If the embedded URL's <net_loc> is non-empty, we skip to |
|---|
| 503 |
Step 7. Otherwise, the embedded URL inherits the <net_loc> |
|---|
| 504 |
(if any) of the base URL. |
|---|
| 505 |
|
|---|
| 506 |
Step 4: If the embedded URL path is preceded by a slash "/", the |
|---|
| 507 |
path is not relative and we skip to Step 7. |
|---|
| 508 |
|
|---|
| 509 |
Step 5: If the embedded URL path is empty (and not preceded by a |
|---|
| 510 |
slash), then the embedded URL inherits the base URL path, |
|---|
| 511 |
and |
|---|
| 512 |
|
|---|
| 513 |
a) if the embedded URL's <params> is non-empty, we skip to |
|---|
| 514 |
step 7; otherwise, it inherits the <params> of the base |
|---|
| 515 |
URL (if any) and |
|---|
| 516 |
|
|---|
| 517 |
b) if the embedded URL's <query> is non-empty, we skip to |
|---|
| 518 |
step 7; otherwise, it inherits the <query> of the base |
|---|
| 519 |
URL (if any) and we skip to step 7. |
|---|
| 520 |
|
|---|
| 521 |
Step 6: The last segment of the base URL's path (anything |
|---|
| 522 |
following the rightmost slash "/", or the entire path if no |
|---|
| 523 |
slash is present) is removed and the embedded URL's path is |
|---|
| 524 |
appended in its place. The following operations are |
|---|
| 525 |
then applied, in order, to the new path: |
|---|
| 526 |
|
|---|
| 527 |
a) All occurrences of "./", where "." is a complete path |
|---|
| 528 |
segment, are removed. |
|---|
| 529 |
|
|---|
| 530 |
b) If the path ends with "." as a complete path segment, |
|---|
| 531 |
that "." is removed. |
|---|
| 532 |
|
|---|
| 533 |
c) All occurrences of "<segment>/../", where <segment> is a |
|---|
| 534 |
complete path segment not equal to "..", are removed. |
|---|
| 535 |
Removal of these path segments is performed iteratively, |
|---|
| 536 |
removing the leftmost matching pattern on each iteration, |
|---|
| 537 |
until no matching pattern remains. |
|---|
| 538 |
|
|---|
| 539 |
d) If the path ends with "<segment>/..", where <segment> is a |
|---|
| 540 |
complete path segment not equal to "..", that |
|---|
| 541 |
"<segment>/.." is removed. |
|---|
| 542 |
|
|---|
| 543 |
Step 7: The resulting URL components, including any inherited from |
|---|
| 544 |
the base URL, are recombined to give the absolute form of |
|---|
| 545 |
the embedded URL. |
|---|
| 546 |
|
|---|
| 547 |
Parameters, regardless of their purpose, do not form a part of the |
|---|
| 548 |
URL path and thus do not affect the resolving of relative paths. In |
|---|
| 549 |
particular, the presence or absence of the ";type=d" parameter on an |
|---|
| 550 |
ftp URL does not affect the interpretation of paths relative to that |
|---|
| 551 |
URL. Fragment identifiers are only inherited from the base URL when |
|---|
| 552 |
the entire embedded URL is empty. |
|---|
| 553 |
|
|---|
| 554 |
The above algorithm is intended to provide an example by which the |
|---|
| 555 |
output of implementations can be tested -- implementation of the |
|---|
| 556 |
algorithm itself is not required. For example, some systems may find |
|---|
| 557 |
it more efficient to implement Step 6 as a pair of segment stacks |
|---|
| 558 |
being merged, rather than as a series of string pattern matches. |
|---|
| 559 |
""" |
|---|
| 560 |
uriBase = self.standardize() |
|---|
| 561 |
uriRelative = self.copyFromURI(uriRelative).standardize() |
|---|
| 562 |
|
|---|
| 563 |
# Step 1: Empty base URI |
|---|
| 564 |
if not uriBase: |
|---|
| 565 |
return uriRelative |
|---|
| 566 |
|
|---|
| 567 |
# Step 2a: Empty embedded URI |
|---|
| 568 |
if not uriRelative: |
|---|
| 569 |
return uriBase |
|---|
| 570 |
|
|---|
| 571 |
# Step 2b: Embedded specifices scheme |
|---|
| 572 |
if uriRelative.scheme: |
|---|
| 573 |
return uriRelative |
|---|
| 574 |
|
|---|
| 575 |
# Step 2c: Embedded inherits scheme |
|---|
| 576 |
uriRelative.scheme = uriBase.scheme |
|---|
| 577 |
|
|---|
| 578 |
# Step 3: Authority is specified... |
|---|
| 579 |
if uriRelative.authority: |
|---|
| 580 |
return uriRelative |
|---|
| 581 |
else: |
|---|
| 582 |
uriRelative.authority = uriBase.authority |
|---|
| 583 |
|
|---|
| 584 |
# Step 4: Check for starting "/" in path |
|---|
| 585 |
if uriRelative.path and uriRelative.path.startswith(self._pathsep): |
|---|
| 586 |
return uriRelative |
|---|
| 587 |
|
|---|
| 588 |
# Step 5: Empty path |
|---|
| 589 |
if uriRelative.path and uriRelative.path.startswith(self._pathattrsep): |
|---|
| 590 |
# Step 5a: Empty path with params |
|---|
| 591 |
# Replace uriBase's path params with the uriRelative path params |
|---|
| 592 |
uriRelative.path = uriBase.path.split(self._pathattrsep, 1)[0] + uriRelative.path |
|---|
| 593 |
return uriRelative |
|---|
| 594 |
elif not uriRelative.path: |
|---|
| 595 |
# Step 5: (continued) |
|---|
| 596 |
uriRelative.path = uriBase.path |
|---|
| 597 |
|
|---|
| 598 |
# Step 5b: query inheritence |
|---|
| 599 |
if uriRelative.query: |
|---|
| 600 |
return uriRelative |
|---|
| 601 |
else: |
|---|
| 602 |
uriRelative.query = uriBase.query |
|---|
| 603 |
return uriRelative |
|---|
| 604 |
|
|---|
| 605 |
# Step 6: Path joining |
|---|
| 606 |
basePath = uriBase.getPathEx(attributes=None) |
|---|
| 607 |
if not append: |
|---|
| 608 |
basePath.segments.pop() |
|---|
| 609 |
basePath.addPath(uriRelative.path, andAttrs=True) |
|---|
| 610 |
uriRelative.setPathEx(basePath, root=True) |
|---|
| 611 |
|
|---|
| 612 |
# Step 7: Return result |
|---|
| 613 |
return uriRelative |
|---|
| 614 |
|
|---|
| 615 |
def append(self, uriRelative): |
|---|
| 616 |
return self.join(uriRelative, True) |
|---|
| 617 |
|
|---|
| 618 |
def isRelative(self, uriRelative, incEqual=False, incPath=True): |
|---|
| 619 |
uriBase = self.standardize() |
|---|
| 620 |
uriRelative = self.copyFromURI(uriRelative).standardize() |
|---|
| 621 |
|
|---|
| 622 |
# Step 2a: Empty embedded URI |
|---|
| 623 |
if not uriRelative: |
|---|
| 624 |
return False |
|---|
| 625 |
|
|---|
| 626 |
# Step 2b: Embedded specifices scheme |
|---|
| 627 |
if uriRelative.scheme: |
|---|
| 628 |
if not incEqual: |
|---|
| 629 |
return False |
|---|
| 630 |
else: |
|---|
| 631 |
# Note: According to http://www.ietf.org/rfc/rfc2396.txt, the |
|---|
| 632 |
# following is not correct; but it's useful for some comparisons |
|---|
| 633 |
if uriRelative.scheme != uriBase.scheme: |
|---|
| 634 |
return False |
|---|
| 635 |
|
|---|
| 636 |
# Step 3: Authority is specified... |
|---|
| 637 |
if uriRelative.authority: |
|---|
| 638 |
if not incEqual: |
|---|
| 639 |
return False |
|---|
| 640 |
else: |
|---|
| 641 |
# Note: According to http://www.ietf.org/rfc/rfc2396.txt, the |
|---|
| 642 |
# following is not correct, and indicates an absolute uri |
|---|
| 643 |
if uriRelative.authority != uriBase.authority: |
|---|
| 644 |
return False |
|---|
| 645 |
|
|---|
| 646 |
if incPath and uriRelative.isAbsPath(): |
|---|
| 647 |
return False |
|---|
| 648 |
|
|---|
| 649 |
# Tests conclude that this is a Relative URI |
|---|
| 650 |
return True |
|---|
| 651 |
|
|---|
| 652 |
def isAbsolute(self, uriRelative, incEqual=False, incPath=True): |
|---|
| 653 |
return not self.isRelative(uriRelative, incEqual, incPath) |
|---|
| 654 |
|
|---|
| 655 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 656 |
#~ Path utilites |
|---|
| 657 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 658 |
|
|---|
| 659 |
def __contains__(self, other): |
|---|
| 660 |
return self.contains(other) |
|---|
| 661 |
|
|---|
| 662 |
def contains(self, other, proper=True): |
|---|
| 663 |
subpath = self.asSubpath(other, True) |
|---|
| 664 |
if proper: |
|---|
| 665 |
return bool(subpath) |
|---|
| 666 |
else: |
|---|
| 667 |
return subpath is not None |
|---|
| 668 |
|
|---|
| 669 |
def isSubpath(self, other, tolerant=True): |
|---|
| 670 |
subpath = self.asSubpath(other, tolerant) |
|---|
| 671 |
return subpath is not None |
|---|
| 672 |
|
|---|
| 673 |
def asSubpath(self, uriRelative, tolerant=False): |
|---|
| 674 |
result = self.copyFromURI(uriRelative).standardize() |
|---|
| 675 |
path = self.getPathEx().asSubpath(result.getPathEx(), tolerant) |
|---|
| 676 |
if path is None: |
|---|
| 677 |
return None |
|---|
| 678 |
else: |
|---|
| 679 |
result.setPathEx(path) |
|---|
| 680 |
return result |
|---|
| 681 |
|
|---|
| 682 |
def asRelativeTo(self, other): |
|---|
| 683 |
pathEx = self.getPathEx().getRelative(other.getPathEx()) |
|---|
| 684 |
parts = self.getURIParts() |
|---|
| 685 |
parts['path'] = pathEx.asPathString() |
|---|
| 686 |
result = self.copyFromURIParts(parts) |
|---|
| 687 |
return result |
|---|
| 688 |
|
|---|
| 689 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 690 |
|
|---|
| 691 |
class URIAuthorityDefault(URI): |
|---|
| 692 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 693 |
#~ Constants / Variables / Etc. |
|---|
| 694 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 695 |
|
|---|
| 696 |
_reURItype = URI._reURItype |
|---|
| 697 |
_reURIauthority = r'(?:(?P<_authoritysep>//)?(?P<authority>[^/?#]*))' |
|---|
| 698 |
#_reURIpath = r'(?P<path>(?P<_pathsep>/)?[^?#]+)?' |
|---|
| 699 |
_reURIpath = URI._reURIpath |
|---|
| 700 |
_reURIquery = URI._reURIquery |
|---|
| 701 |
_reURIfragment = URI._reURIfragment |
|---|
| 702 |
|
|---|
| 703 |
_reURI = _reURItype + _reURIauthority + _reURIpath + _reURIquery + _reURIfragment |
|---|
| 704 |
_reURIPattern = re.compile(_reURI) |
|---|
| 705 |
|
|---|
| 706 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 707 |
#~ Class Aliases |
|---|
| 708 |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|---|
| 709 |
|
|---|
| 710 |
RFC2396URI = URI |
|---|
| 711 |
URIPathDefault = URI |
|---|
| 712 |
|
|---|
| 713 |
URIAuthDefault = URIAuthorityDefault |
|---|
| 714 |
URIAuthority = URIAuthorityDefault |
|---|
| 715 |
URIAuth = URIAuthorityDefault |
|---|
| 716 |
URIHost = URIAuthorityDefault |
|---|
| 717 |
|
|---|