diff --git a/.appveyor.yml b/.appveyor.yml
new file mode 100644
index 00000000..19f589e5
--- /dev/null
+++ b/.appveyor.yml
@@ -0,0 +1,38 @@
+# What Python version is installed where:
+# http://www.appveyor.com/docs/installed-software#python
+
+# This configuration based on:
+# https://github.com/cookiecutter/cookiecutter/blob/5e65edf4c340993f462ddeaf44f99eb6f9da66f9/appveyor.yml
+
+environment:
+ matrix:
+ - PYTHON: "C:\\Python36-x64"
+ TOX_ENV: "test-py36,codecov"
+
+ - PYTHON: "C:\\Python37-x64"
+ TOX_ENV: "test-py37,codecov"
+
+ - PYTHON: "C:\\Python38-x64"
+ TOX_ENV: "test-py38,codecov"
+
+
+init:
+ - set OS=WINDOWS
+ - set PATH=%PYTHON%;%PYTHON%\Scripts;%PATH%
+ - "git config --system http.sslcainfo \"C:\\Program Files\\Git\\mingw64\\ssl\\certs\\ca-bundle.crt\""
+ - "%PYTHON%/python -V"
+ - "%PYTHON%/python -c \"import struct;print(8 * struct.calcsize(\'P\'))\""
+ - set
+
+install:
+ - "%PYTHON%/Scripts/easy_install -U pip"
+ - "%PYTHON%/Scripts/pip install -U --force-reinstall tox virtualenv wheel"
+
+
+build: false # Not a C# project, build stuff at the test step instead.
+
+test_script:
+ - "%PYTHON%/Scripts/tox -e %TOX_ENV%"
+
+artifacts:
+ - path: dist\*
diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 00000000..c7166536
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,9 @@
+[run]
+branch = True
+omit = */flycheck_*
+
+[report]
+precision = 2
+exclude_lines =
+ if TYPE_CHECKING
+ \s*\.\.\.$
diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
new file mode 100644
index 00000000..b5b7e9eb
--- /dev/null
+++ b/.github/workflows/cicd.yml
@@ -0,0 +1,238 @@
+# Docs:
+# https://help.github.com/en/actions/automating-your-workflow-with-github-actions
+
+
+
+name: CI/CD
+
+
+on:
+ push:
+ branches: ["master"]
+ pull_request:
+ branches: ["master"]
+
+
+jobs:
+
+ info:
+
+ name: Workflow information
+ runs-on: ubuntu-latest
+ timeout-minutes: 1
+
+ steps:
+
+ - name: Print GitHub Context
+ env:
+ GITHUB_CONTEXT: ${{ toJson(github) }}
+ run: echo "${GITHUB_CONTEXT}";
+
+ - name: Print Job Context
+ env:
+ JOB_CONTEXT: ${{ toJson(job) }}
+ run: echo "${JOB_CONTEXT}";
+
+ - name: Print Steps Context
+ env:
+ STEPS_CONTEXT: ${{ toJson(steps) }}
+ run: echo "${STEPS_CONTEXT}";
+
+ - name: Print Runner Context
+ env:
+ RUNNER_CONTEXT: ${{ toJson(runner) }}
+ run: echo "${RUNNER_CONTEXT}";
+
+ - name: Print Strategy Context
+ env:
+ STRATEGY_CONTEXT: ${{ toJson(strategy) }}
+ run: echo "${STRATEGY_CONTEXT}";
+
+ - name: Print Matrix Context
+ env:
+ MATRIX_CONTEXT: ${{ toJson(matrix) }}
+ run: echo "${MATRIX_CONTEXT}";
+
+
+ flake8:
+
+ name: Flake8 (linter)
+
+ runs-on: ubuntu-latest
+ timeout-minutes: 5
+
+ steps:
+
+ - name: Checkout source code
+ uses: actions/checkout@v2
+
+ - name: Install Python
+ uses: actions/setup-python@v1
+ with:
+ python-version: "3.9"
+
+ - name: Install Tox
+ run: pip install tox;
+
+ - name: Run Flake8
+ run: tox -e flake8;
+
+
+ black:
+
+ name: Black (linter)
+
+ runs-on: ubuntu-latest
+ timeout-minutes: 5
+
+ steps:
+
+ - name: Checkout source code
+ uses: actions/checkout@v2
+
+ - name: Install Python
+ uses: actions/setup-python@v1
+ with:
+ python-version: "3.9"
+
+ - name: Install Tox
+ run: pip install tox;
+
+ - name: Run Black
+ run: tox -e black;
+
+
+ mypy:
+ name: Mypy (static type checker)
+
+ runs-on: ubuntu-latest
+ timeout-minutes: 5
+
+ steps:
+
+ - name: Checkout source code
+ uses: actions/checkout@v2
+
+ - name: Install Python
+ uses: actions/setup-python@v1
+ with:
+ python-version: "3.9"
+
+ - name: Install Tox
+ run: pip install tox;
+
+ - name: Run Mypy
+ run: tox -e mypy;
+
+
+ docs:
+
+ name: Build documentation
+
+ runs-on: ubuntu-latest
+ timeout-minutes: 5
+
+ steps:
+
+ - name: Checkout source code
+ uses: actions/checkout@v2
+
+ - name: Install Python
+ uses: actions/setup-python@v1
+ with:
+ python-version: "3.9"
+
+ - name: Install Tox
+ run: pip install tox;
+
+ - name: Build documentation
+ run: tox -e docs;
+
+
+ packaging:
+ name: Packaging
+
+ runs-on: ubuntu-latest
+ timeout-minutes: 5
+
+ steps:
+
+ - name: Checkout source code
+ uses: actions/checkout@v2
+
+ - name: Install Python
+ uses: actions/setup-python@v1
+ with:
+ python-version: "3.9"
+
+ - name: Install Tox
+ run: pip install tox;
+
+ - name: Check packaging
+ run: tox -e packaging;
+
+
+ unit:
+ name: Unit Tests using Python ${{ matrix.python }} on Ubuntu
+
+ needs: [flake8, black, mypy, docs, packaging]
+
+ runs-on: ubuntu-latest
+ timeout-minutes: 30
+ strategy:
+ matrix:
+ python: ["2.7", "3.5", "3.6", "3.7", "3.8", "3.9", "pypy2", "pypy3"]
+
+ steps:
+
+ - name: Checkout source code
+ uses: actions/checkout@v2
+
+ - name: Install Python
+ uses: actions/setup-python@v1
+ with:
+ python-version: ${{ matrix.python }}
+
+ - name: Install Tox
+ run: pip install tox;
+
+ - name: Run unit tests
+ shell: bash
+ # This hairy shell code is here to map the Python versions
+ # specified above to the equivalents used in Tox environments.
+ run: |
+ set -eux
+ py="${{ matrix.python }}";
+ if [[ $py =~ pypy ]]; then # PyPy
+ py_test="${py}";
+ else # CPython
+ py_test="py${py/./}"; # Add "py" prefix, remove "."
+ fi;
+ env_test="test-${py_test}-coverage_xml";
+ echo "Test environment: ${env_test}";
+ tox -e "${env_test}";
+ tar cvzf pytest-logs.tgz ".tox/${env_test}/log";
+
+ - name: Upload pytest log artifact
+ if: failure()
+ uses: actions/upload-artifact@v1
+ with:
+ name: pytest-logs
+ path: pytest-logs.tgz
+
+ # Use the latest supported Python version for combining coverage to
+ # prevent parsing errors in older versions when looking at modern code.
+ - uses: "actions/setup-python@v2"
+ with:
+ python-version: "3.9"
+
+ - name: "Upload coverage to Codecov"
+ uses: "codecov/codecov-action@v1"
+ with:
+ env_vars: GITHUB_REF,GITHUB_COMMIT,GITHUB_USER,GITHUB_WORKFLOW
+ fail_ci_if_error: true
+ env:
+ GITHUB_REF: ${{ github.ref }}
+ GITHUB_COMMIT: ${{ github.sha }}
+ GITHUB_USER: ${{ github.actor }}
+ GITHUB_WORKFLOW: ${{ github.workflow }}
diff --git a/.gitignore b/.gitignore
index 0ef6fd4d..35230642 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,5 @@
-docs/_build
+/docs/_build/
tmp.py
-htmlcov/
-.coverage.*
*.py[cod]
# emacs
@@ -31,11 +29,23 @@ lib64
# Installer logs
pip-log.txt
-# Unit test / coverage reports
-.coverage
-.tox
+# Testing
+/.tox/
+/.hypothesis/
nosetests.xml
+# Coverage
+/.coverage
+/.coverage.*
+/htmlcov/
+/.mypy_cache/
+
+# Documentation
+/htmldocs/
+
+# Documentation
+/htmldocs/
+
# Translations
*.mo
diff --git a/.tox-coveragerc b/.tox-coveragerc
deleted file mode 100644
index 44178a43..00000000
--- a/.tox-coveragerc
+++ /dev/null
@@ -1,14 +0,0 @@
-[run]
-branch = True
-source =
- hyperlink
- ../hyperlink
-omit =
- */flycheck_*
-
-[paths]
-source =
- ../hyperlink
- */lib/python*/site-packages/hyperlink
- */Lib/site-packages/hyperlink
- */pypy/site-packages/hyperlink
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 1aff2d28..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-sudo: false
-cache:
-directories:
-- $HOME/.cache/pip
-
-language: python
-
-
-matrix:
- include:
- - python: "2.7"
- env: TOXENV=py27
- - python: "3.4"
- env: TOXENV=py34
- - python: "3.5"
- env: TOXENV=py35
- - python: "3.6"
- env: TOXENV=py36
- - python: "pypy"
- env: TOXENV=pypy
- - python: "2.7"
- env: TOXENV=packaging
-
-
-install:
- - "pip install -r requirements-test.txt"
-
-script:
- - tox
-
-
-before_install:
- - pip install codecov coverage
-
-
-after_success:
- - tox -e coverage-report
- - COVERAGE_FILE=.tox/.coverage coverage xml
- - codecov -f coverage.xml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1bc4f61e..50f34c6a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,42 @@
# Hyperlink Changelog
-## dev (not yet released)
+## Next
+
+* CPython 3.9 added to test matrix
+
+## 21.0.0
+
+*(January 7, 2021)*
+
+* Update plus sign (`+`) handling to work with/like HTML form encoding
+ (`POST`) by default, fixes [#129][i129], and associated roundtripping ([#146][i146]).
+* Package IDNA tables. ([#134][i134])
+* Long overdue dependency bumps
+
+[i129]: https://github.com/python-hyper/hyperlink/issues/129
+[i134]: https://github.com/python-hyper/hyperlink/issues/134
+[i146]: https://github.com/python-hyper/hyperlink/issues/146
+
+## 20.0.1
+
+*(August 4, 2020)*
+
+Rerelease to fix packaging metadata around conditional requirements.
+See [issue #133](https://github.com/python-hyper/hyperlink/issues/133)
+for more details.
+
+## 20.0.0
+
+*(August 3, 2020)*
+
+* CPython 3.7 and 3.8 and PyPy3 added to test matrix
+* Hyperlink now has type hints and they are now exported per
+ [PEP 561](https://www.python.org/dev/peps/pep-0561/).
+* Several bugs related to hidden state were fixed, making it so that all data
+ on a `URL` object (including `rooted` and `uses_netloc`) is reflected by and
+ consistent with its textual representation.
+ This does mean that sometimes these constructor arguments are ignored, if it
+ would create invalid or unparseable URL text.
## 19.0.0
@@ -13,7 +49,8 @@ A query parameter-centric release, with two enhancements:
[#39](https://github.com/python-hyper/hyperlink/pull/39))
* `URL.remove()` now accepts *value* and *limit* parameters, allowing
for removal of specific name-value pairs, as well as limiting the
- number of removals. (see [#71](https://github.com/python-hyper/hyperlink/pull/71))
+ number of removals.
+ (See [#71](https://github.com/python-hyper/hyperlink/pull/71))
## 18.0.0
diff --git a/LICENSE b/LICENSE
index 30953dde..a73f882f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -5,6 +5,7 @@ Jean Paul Calderone
Adi Roiban
Amber Hawkie Brown
Mahmoud Hashemi
+Wilfredo Sanchez Vega
and others that have contributed code to the public domain.
diff --git a/MANIFEST.in b/MANIFEST.in
index c4b6e32f..5869a052 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,9 @@
-include README.md LICENSE CHANGELOG.md tox.ini requirements-test.txt .coveragerc Makefile pytest.ini .tox-coveragerc
-exclude TODO.md appveyor.yml
+include README.md LICENSE CHANGELOG.md
+include tox.ini pytest.ini .coveragerc
+exclude TODO.md
+exclude .appveyor.yml
+
+include src/hyperlink/idna-tables-properties.csv.gz
graft docs
prune docs/_build
diff --git a/README.md b/README.md
index bca5d5f0..017f9eb8 100644
--- a/README.md
+++ b/README.md
@@ -2,15 +2,30 @@
*Cool URLs that don't change.*
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Hyperlink provides a pure-Python implementation of immutable
URLs. Based on [RFC 3986][rfc3986] and [3987][rfc3987], the Hyperlink URL
makes working with both URIs and IRIs easy.
-Hyperlink is tested against Python 2.7, 3.4, 3.5, 3.6, and PyPy.
+Hyperlink is tested against Python 2.7, 3.4, 3.5, 3.6, 3.7, 3.8, and PyPy.
Full documentation is available on [Read the Docs][docs].
diff --git a/TODO.md b/TODO.md
index e8ac57ab..f5d2fdda 100644
--- a/TODO.md
+++ b/TODO.md
@@ -29,7 +29,7 @@
* Speed up percent encoding with urlutils approach
* More default ports
* resolve dots on (empty) click
-* better error on URL constructor (single string argument leads to succesful instantiation with invalid scheme)
+* better error on URL constructor (single string argument leads to successful instantiation with invalid scheme)
* pct encode userinfo
* `__hash__`
* README
diff --git a/appveyor.yml b/appveyor.yml
deleted file mode 100644
index 5f146255..00000000
--- a/appveyor.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-# What Python version is installed where:
-# http://www.appveyor.com/docs/installed-software#python
-
-# This configuration based on:
-# https://github.com/audreyr/cookiecutter/commit/3c4685f536afda3be93da3fe3039cec0ab0d60a3
-
-environment:
- matrix:
- - PYTHON: "C:\\Python27-x64"
- TOX_ENV: "py27"
-
- - PYTHON: "C:\\Python36-x64"
- TOX_ENV: "py36"
-
-
-init:
- - set PATH=%PYTHON%;%PYTHON%\Scripts;%PATH%
- - "git config --system http.sslcainfo \"C:\\Program Files\\Git\\mingw64\\ssl\\certs\\ca-bundle.crt\""
- - "%PYTHON%/python -V"
- - "%PYTHON%/python -c \"import struct;print(8 * struct.calcsize(\'P\'))\""
-
-install:
- - "%PYTHON%/Scripts/easy_install -U pip"
- - "%PYTHON%/Scripts/pip install -U --force-reinstall tox wheel"
-
-
-build: false # Not a C# project, build stuff at the test step instead.
-
-test_script:
- - "%PYTHON%/Scripts/tox -e %TOX_ENV%"
-
-after_test:
- - "%PYTHON%/python setup.py bdist_wheel"
- - ps: "ls dist"
-
-on_success:
- # Report coverage results to codecov.io
- # and export tox environment variables
- - "%PYTHON%/Scripts/tox -e coverage-report"
- - "%PYTHON%/Scripts/pip install codecov coverage"
- - set COVERAGE_FILE=.tox/.coverage
- - "%PYTHON%/Scripts/coverage xml"
- - set OS=WINDOWS
- - "%PYTHON%/Scripts/codecov -f coverage.xml -e TOX_ENV OS"
-
-artifacts:
- - path: dist\*
-
-#on_success:
-# - TODO: upload the content of dist/*.whl to a public wheelhouse
diff --git a/docs/api.rst b/docs/api.rst
index 47854540..93ebb782 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -5,11 +5,43 @@ Hyperlink API
.. automodule:: hyperlink._url
+.. contents::
+ :local:
+
Creation
--------
-Before you can work with URLs, you must create URLs. There are two
-ways to create URLs, from parts and from text.
+Before you can work with URLs, you must create URLs.
+
+Parsing Text
+^^^^^^^^^^^^
+
+If you already have a textual URL, the easiest way to get URL objects
+is with the :func:`parse()` function:
+
+.. autofunction:: hyperlink.parse
+
+By default, :func:`~hyperlink.parse()` returns an instance of
+:class:`DecodedURL`, a URL type that handles all encoding for you, by
+wrapping the lower-level :class:`URL`.
+
+DecodedURL
+^^^^^^^^^^
+
+.. autoclass:: hyperlink.DecodedURL
+.. automethod:: hyperlink.DecodedURL.from_text
+
+The Encoded URL
+^^^^^^^^^^^^^^^
+
+The lower-level :class:`URL` looks very similar to the
+:class:`DecodedURL`, but does not handle all encoding cases for
+you. Use with caution.
+
+.. note::
+
+ :class:`URL` is also available as an alias,
+ ``hyperlink.EncodedURL`` for more explicit usage.
.. autoclass:: hyperlink.URL
.. automethod:: hyperlink.URL.from_text
@@ -61,7 +93,6 @@ URLs have many parts, and URL objects have many attributes to represent them.
.. autoattribute:: hyperlink.URL.userinfo
.. autoattribute:: hyperlink.URL.user
.. autoattribute:: hyperlink.URL.rooted
-.. autoattribute:: hyperlink.URL.family
Low-level functions
-------------------
@@ -70,6 +101,6 @@ A couple of notable helpers used by the :class:`~hyperlink.URL` type.
.. autoclass:: hyperlink.URLParseError
.. autofunction:: hyperlink.register_scheme
-.. autofunction:: hyperlink.parse_host
+.. autofunction:: hyperlink.parse
.. TODO: run doctests in docs?
diff --git a/docs/conf.py b/docs/conf.py
index 0eb8cf66..f8a4fb98 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -61,11 +61,11 @@
# General information about the project.
project = u'hyperlink'
-copyright = u'2018, Mahmoud Hashemi'
+copyright = u'2021, Mahmoud Hashemi'
author = u'Mahmoud Hashemi'
-version = '19.0'
-release = '19.0.0'
+version = '21.0'
+release = '21.0.0'
if os.name != 'nt':
today_fmt = '%B %d, %Y'
@@ -76,7 +76,7 @@
pygments_style = 'sphinx'
# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'python': ('https://docs.python.org/2.7', None)}
+intersphinx_mapping = {'python': ('https://docs.python.org/3.7', None)}
# -- Options for HTML output ----------------------------------------------
diff --git a/docs/index.rst b/docs/index.rst
index 2e65635d..cfc0c47d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -10,7 +10,7 @@ hyperlink
URLs. Based on `RFC 3986`_ and `RFC 3987`_, the Hyperlink URL balances
simplicity and correctness for both :ref:`URIs and IRIs `.
-Hyperlink is tested against Python 2.7, 3.4, 3.5, 3.6, and PyPy.
+Hyperlink is tested against Python 2.7, 3.4, 3.5, 3.6, 3.7, 3.8, and PyPy.
For an introduction to the hyperlink library, its background, and URLs
in general, see `this talk from PyConWeb 2017`_ (and `the accompanying
@@ -39,9 +39,9 @@ library. The easiest way to install is with pip::
Then, URLs are just an import away::
- from hyperlink import URL
+ import hyperlink
- url = URL.from_text(u'http://github.com/python-hyper/hyperlink?utm_source=readthedocs')
+ url = hyperlink.parse(u'http://github.com/python-hyper/hyperlink?utm_source=readthedocs')
better_url = url.replace(scheme=u'https', port=443)
org_url = better_url.click(u'.')
@@ -49,7 +49,7 @@ Then, URLs are just an import away::
print(org_url.to_text())
# prints: https://github.com/python-hyper/
- print(better_url.get(u'utm_source'))
+ print(better_url.get(u'utm_source')[0])
# prints: readthedocs
See :ref:`the API docs ` for more usage examples.
diff --git a/hyperlink/__init__.py b/hyperlink/__init__.py
deleted file mode 100644
index a027d52d..00000000
--- a/hyperlink/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-
-from ._url import (URL,
- parse,
- EncodedURL,
- DecodedURL,
- URLParseError,
- register_scheme)
-
-__all__ = [
- "URL",
- "parse",
- "EncodedURL",
- "DecodedURL",
- "URLParseError",
- "register_scheme",
-]
diff --git a/hyperlink/_url.py b/hyperlink/_url.py
deleted file mode 100644
index 50e8535e..00000000
--- a/hyperlink/_url.py
+++ /dev/null
@@ -1,1921 +0,0 @@
-# -*- coding: utf-8 -*-
-u"""Hyperlink provides Pythonic URL parsing, construction, and rendering.
-
-Usage is straightforward::
-
- >>> from hyperlink import URL
- >>> url = URL.from_text(u'http://github.com/mahmoud/hyperlink?utm_source=docs')
- >>> url.host
- u'github.com'
- >>> secure_url = url.replace(scheme=u'https')
- >>> secure_url.get('utm_source')[0]
- u'docs'
-
-As seen here, the API revolves around the lightweight and immutable
-:class:`URL` type, documented below.
-"""
-
-import re
-import sys
-import string
-import socket
-from unicodedata import normalize
-try:
- from socket import inet_pton
-except ImportError:
- inet_pton = None # defined below
-try:
- from collections.abc import Mapping
-except ImportError: # Python 2
- from collections import Mapping
-
-# Note: IDNAError is a subclass of UnicodeError
-from idna import encode as idna_encode, decode as idna_decode, IDNAError
-
-
-if inet_pton is None:
- # based on https://gist.github.com/nnemkin/4966028
- # this code only applies on Windows Python 2.7
- import ctypes
-
- class _sockaddr(ctypes.Structure):
- _fields_ = [("sa_family", ctypes.c_short),
- ("__pad1", ctypes.c_ushort),
- ("ipv4_addr", ctypes.c_byte * 4),
- ("ipv6_addr", ctypes.c_byte * 16),
- ("__pad2", ctypes.c_ulong)]
-
- WSAStringToAddressA = ctypes.windll.ws2_32.WSAStringToAddressA
- WSAAddressToStringA = ctypes.windll.ws2_32.WSAAddressToStringA
-
- def inet_pton(address_family, ip_string):
- addr = _sockaddr()
- ip_string = ip_string.encode('ascii')
- addr.sa_family = address_family
- addr_size = ctypes.c_int(ctypes.sizeof(addr))
-
- if WSAStringToAddressA(ip_string, address_family, None, ctypes.byref(addr), ctypes.byref(addr_size)) != 0:
- raise socket.error(ctypes.FormatError())
-
- if address_family == socket.AF_INET:
- return ctypes.string_at(addr.ipv4_addr, 4)
- if address_family == socket.AF_INET6:
- return ctypes.string_at(addr.ipv6_addr, 16)
- raise socket.error('unknown address family')
-
-
-PY2 = (sys.version_info[0] == 2)
-unicode = type(u'')
-try:
- unichr
-except NameError:
- unichr = chr # py3
-NoneType = type(None)
-
-
-# from boltons.typeutils
-def make_sentinel(name='_MISSING', var_name=None):
- """Creates and returns a new **instance** of a new class, suitable for
- usage as a "sentinel", a kind of singleton often used to indicate
- a value is missing when ``None`` is a valid input.
-
- Args:
- name (str): Name of the Sentinel
- var_name (str): Set this name to the name of the variable in
- its respective module enable pickleability.
-
- >>> make_sentinel(var_name='_MISSING')
- _MISSING
-
- The most common use cases here in boltons are as default values
- for optional function arguments, partly because of its
- less-confusing appearance in automatically generated
- documentation. Sentinels also function well as placeholders in queues
- and linked lists.
-
- .. note::
-
- By design, additional calls to ``make_sentinel`` with the same
- values will not produce equivalent objects.
-
- >>> make_sentinel('TEST') == make_sentinel('TEST')
- False
- >>> type(make_sentinel('TEST')) == type(make_sentinel('TEST'))
- False
-
- """
- class Sentinel(object):
- def __init__(self):
- self.name = name
- self.var_name = var_name
-
- def __repr__(self):
- if self.var_name:
- return self.var_name
- return '%s(%r)' % (self.__class__.__name__, self.name)
- if var_name:
- def __reduce__(self):
- return self.var_name
-
- def __nonzero__(self):
- return False
-
- __bool__ = __nonzero__
-
- return Sentinel()
-
-
-_unspecified = _UNSET = make_sentinel('_UNSET')
-
-
-# RFC 3986 Section 2.3, Unreserved URI Characters
-# https://tools.ietf.org/html/rfc3986#section-2.3
-_UNRESERVED_CHARS = frozenset('~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- 'abcdefghijklmnopqrstuvwxyz')
-
-
-# URL parsing regex (based on RFC 3986 Appendix B, with modifications)
-_URL_RE = re.compile(r'^((?P[^:/?#]+):)?'
- r'((?P<_netloc_sep>//)'
- r'(?P[^/?#]*))?'
- r'(?P[^?#]*)'
- r'(\?(?P[^#]*))?'
- r'(#(?P.*))?$')
-_SCHEME_RE = re.compile(r'^[a-zA-Z0-9+-.]*$')
-_AUTHORITY_RE = re.compile(r'^(?:(?P[^@/?#]*)@)?'
- r'(?P'
- r'(?:\[(?P[^[\]/?#]*)\])'
- r'|(?P[^:/?#[\]]*)'
- r'|(?P.*?))?'
- r'(?::(?P.*))?$')
-
-
-_HEX_CHAR_MAP = dict([((a + b).encode('ascii'),
- unichr(int(a + b, 16)).encode('charmap'))
- for a in string.hexdigits for b in string.hexdigits])
-_ASCII_RE = re.compile('([\x00-\x7f]+)')
-
-# RFC 3986 section 2.2, Reserved Characters
-# https://tools.ietf.org/html/rfc3986#section-2.2
-_GEN_DELIMS = frozenset(u':/?#[]@')
-_SUB_DELIMS = frozenset(u"!$&'()*+,;=")
-_ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS
-
-_USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u'%')
-_USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE
-_PATH_SAFE = _USERINFO_SAFE | set(u':@')
-_PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE
-_SCHEMELESS_PATH_SAFE = _PATH_SAFE - set(':')
-_SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE
-_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u'/?')
-_FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE
-_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u'&+')
-_QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE
-_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u'=')
-_QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE
-
-
-def _make_decode_map(delims, allow_percent=False):
- ret = dict(_HEX_CHAR_MAP)
- if not allow_percent:
- delims = set(delims) | set([u'%'])
- for delim in delims:
- _hexord = '{0:02X}'.format(ord(delim)).encode('ascii')
- _hexord_lower = _hexord.lower()
- ret.pop(_hexord)
- if _hexord != _hexord_lower:
- ret.pop(_hexord_lower)
- return ret
-
-
-def _make_quote_map(safe_chars):
- ret = {}
- # v is included in the dict for py3 mostly, because bytestrings
- # are iterables of ints, of course!
- for i, v in zip(range(256), range(256)):
- c = chr(v)
- if c in safe_chars:
- ret[c] = ret[v] = c
- else:
- ret[c] = ret[v] = '%{0:02X}'.format(i)
- return ret
-
-
-_USERINFO_PART_QUOTE_MAP = _make_quote_map(_USERINFO_SAFE)
-_USERINFO_DECODE_MAP = _make_decode_map(_USERINFO_DELIMS)
-_PATH_PART_QUOTE_MAP = _make_quote_map(_PATH_SAFE)
-_SCHEMELESS_PATH_PART_QUOTE_MAP = _make_quote_map(_SCHEMELESS_PATH_SAFE)
-_PATH_DECODE_MAP = _make_decode_map(_PATH_DELIMS)
-_QUERY_KEY_QUOTE_MAP = _make_quote_map(_QUERY_KEY_SAFE)
-_QUERY_KEY_DECODE_MAP = _make_decode_map(_QUERY_KEY_DELIMS)
-_QUERY_VALUE_QUOTE_MAP = _make_quote_map(_QUERY_VALUE_SAFE)
-_QUERY_VALUE_DECODE_MAP = _make_decode_map(_QUERY_VALUE_DELIMS)
-_FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE)
-_FRAGMENT_DECODE_MAP = _make_decode_map(_FRAGMENT_DELIMS)
-_UNRESERVED_QUOTE_MAP = _make_quote_map(_UNRESERVED_CHARS)
-_UNRESERVED_DECODE_MAP = dict([(k, v) for k, v in _HEX_CHAR_MAP.items()
- if v.decode('ascii', 'replace')
- in _UNRESERVED_CHARS])
-
-_ROOT_PATHS = frozenset(((), (u'',)))
-
-
-def _encode_reserved(text, maximal=True):
- """A very comprehensive percent encoding for encoding all
- delimiters. Used for arguments to DecodedURL, where a % means a
- percent sign, and not the character used by URLs for escaping
- bytes.
- """
- if maximal:
- bytestr = normalize('NFC', text).encode('utf8')
- return u''.join([_UNRESERVED_QUOTE_MAP[b] for b in bytestr])
- return u''.join([_UNRESERVED_QUOTE_MAP[t] if t in _UNRESERVED_CHARS
- else t for t in text])
-
-
-def _encode_path_part(text, maximal=True):
- "Percent-encode a single segment of a URL path."
- if maximal:
- bytestr = normalize('NFC', text).encode('utf8')
- return u''.join([_PATH_PART_QUOTE_MAP[b] for b in bytestr])
- return u''.join([_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t
- for t in text])
-
-
-def _encode_schemeless_path_part(text, maximal=True):
- """Percent-encode the first segment of a URL path for a URL without a
- scheme specified.
- """
- if maximal:
- bytestr = normalize('NFC', text).encode('utf8')
- return u''.join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr])
- return u''.join([_SCHEMELESS_PATH_PART_QUOTE_MAP[t]
- if t in _SCHEMELESS_PATH_DELIMS else t for t in text])
-
-
-def _encode_path_parts(text_parts, rooted=False, has_scheme=True,
- has_authority=True, joined=True, maximal=True):
- """
- Percent-encode a tuple of path parts into a complete path.
-
- Setting *maximal* to False percent-encodes only the reserved
- characters that are syntactically necessary for serialization,
- preserving any IRI-style textual data.
-
- Leaving *maximal* set to its default True percent-encodes
- everything required to convert a portion of an IRI to a portion of
- a URI.
-
- RFC 3986 3.3:
-
- If a URI contains an authority component, then the path component
- must either be empty or begin with a slash ("/") character. If a URI
- does not contain an authority component, then the path cannot begin
- with two slash characters ("//"). In addition, a URI reference
- (Section 4.1) may be a relative-path reference, in which case the
- first path segment cannot contain a colon (":") character.
- """
- if not text_parts:
- return u'' if joined else text_parts
- if rooted:
- text_parts = (u'',) + text_parts
- # elif has_authority and text_parts:
- # raise Exception('see rfc above') # TODO: too late to fail like this?
- encoded_parts = []
- if has_scheme:
- encoded_parts = [_encode_path_part(part, maximal=maximal)
- if part else part for part in text_parts]
- else:
- encoded_parts = [_encode_schemeless_path_part(text_parts[0])]
- encoded_parts.extend([_encode_path_part(part, maximal=maximal)
- if part else part for part in text_parts[1:]])
- if joined:
- return u'/'.join(encoded_parts)
- return tuple(encoded_parts)
-
-
-def _encode_query_key(text, maximal=True):
- """
- Percent-encode a single query string key or value.
- """
- if maximal:
- bytestr = normalize('NFC', text).encode('utf8')
- return u''.join([_QUERY_KEY_QUOTE_MAP[b] for b in bytestr])
- return u''.join([_QUERY_KEY_QUOTE_MAP[t] if t in _QUERY_KEY_DELIMS else t
- for t in text])
-
-
-def _encode_query_value(text, maximal=True):
- """
- Percent-encode a single query string key or value.
- """
- if maximal:
- bytestr = normalize('NFC', text).encode('utf8')
- return u''.join([_QUERY_VALUE_QUOTE_MAP[b] for b in bytestr])
- return u''.join([_QUERY_VALUE_QUOTE_MAP[t]
- if t in _QUERY_VALUE_DELIMS else t for t in text])
-
-
-def _encode_fragment_part(text, maximal=True):
- """Quote the fragment part of the URL. Fragments don't have
- subdelimiters, so the whole URL fragment can be passed.
- """
- if maximal:
- bytestr = normalize('NFC', text).encode('utf8')
- return u''.join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr])
- return u''.join([_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t
- for t in text])
-
-
-def _encode_userinfo_part(text, maximal=True):
- """Quote special characters in either the username or password
- section of the URL.
- """
- if maximal:
- bytestr = normalize('NFC', text).encode('utf8')
- return u''.join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr])
- return u''.join([_USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS
- else t for t in text])
-
-
-
-# This port list painstakingly curated by hand searching through
-# https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
-# and
-# https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml
-SCHEME_PORT_MAP = {'acap': 674, 'afp': 548, 'dict': 2628, 'dns': 53,
- 'file': None, 'ftp': 21, 'git': 9418, 'gopher': 70,
- 'http': 80, 'https': 443, 'imap': 143, 'ipp': 631,
- 'ipps': 631, 'irc': 194, 'ircs': 6697, 'ldap': 389,
- 'ldaps': 636, 'mms': 1755, 'msrp': 2855, 'msrps': None,
- 'mtqp': 1038, 'nfs': 111, 'nntp': 119, 'nntps': 563,
- 'pop': 110, 'prospero': 1525, 'redis': 6379, 'rsync': 873,
- 'rtsp': 554, 'rtsps': 322, 'rtspu': 5005, 'sftp': 22,
- 'smb': 445, 'snmp': 161, 'ssh': 22, 'steam': None,
- 'svn': 3690, 'telnet': 23, 'ventrilo': 3784, 'vnc': 5900,
- 'wais': 210, 'ws': 80, 'wss': 443, 'xmpp': None}
-
-# This list of schemes that don't use authorities is also from the link above.
-NO_NETLOC_SCHEMES = set(['urn', 'about', 'bitcoin', 'blob', 'data', 'geo',
- 'magnet', 'mailto', 'news', 'pkcs11',
- 'sip', 'sips', 'tel'])
-# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
-
-
-def register_scheme(text, uses_netloc=True, default_port=None):
- """Registers new scheme information, resulting in correct port and
- slash behavior from the URL object. There are dozens of standard
- schemes preregistered, so this function is mostly meant for
- proprietary internal customizations or stopgaps on missing
- standards information. If a scheme seems to be missing, please
- `file an issue`_!
-
- Args:
- text (unicode): Text representing the scheme.
- (the 'http' in 'http://hatnote.com')
- uses_netloc (bool): Does the scheme support specifying a
- network host? For instance, "http" does, "mailto" does
- not. Defaults to True.
- default_port (int): The default port, if any, for netloc-using
- schemes.
-
- .. _file an issue: https://github.com/mahmoud/hyperlink/issues
-
- """
- text = text.lower()
- if default_port is not None:
- try:
- default_port = int(default_port)
- except (ValueError, TypeError):
- raise ValueError('default_port expected integer or None, not %r'
- % (default_port,))
-
- if uses_netloc is True:
- SCHEME_PORT_MAP[text] = default_port
- elif uses_netloc is False:
- if default_port is not None:
- raise ValueError('unexpected default port while specifying'
- ' non-netloc scheme: %r' % default_port)
- NO_NETLOC_SCHEMES.add(text)
- else:
- raise ValueError('uses_netloc expected bool, not: %r' % uses_netloc)
-
- return
-
-
-def scheme_uses_netloc(scheme, default=None):
- """Whether or not a URL uses :code:`:` or :code:`://` to separate the
- scheme from the rest of the URL depends on the scheme's own
- standard definition. There is no way to infer this behavior
- from other parts of the URL. A scheme either supports network
- locations or it does not.
-
- The URL type's approach to this is to check for explicitly
- registered schemes, with common schemes like HTTP
- preregistered. This is the same approach taken by
- :mod:`urlparse`.
-
- URL adds two additional heuristics if the scheme as a whole is
- not registered. First, it attempts to check the subpart of the
- scheme after the last ``+`` character. This adds intuitive
- behavior for schemes like ``git+ssh``. Second, if a URL with
- an unrecognized scheme is loaded, it will maintain the
- separator it sees.
- """
- if not scheme:
- return False
- scheme = scheme.lower()
- if scheme in SCHEME_PORT_MAP:
- return True
- if scheme in NO_NETLOC_SCHEMES:
- return False
- if scheme.split('+')[-1] in SCHEME_PORT_MAP:
- return True
- return default
-
-
-class URLParseError(ValueError):
- """Exception inheriting from :exc:`ValueError`, raised when failing to
- parse a URL. Mostly raised on invalid ports and IPv6 addresses.
- """
- pass
-
-
-def _optional(argument, default):
- if argument is _UNSET:
- return default
- else:
- return argument
-
-
-def _typecheck(name, value, *types):
- """
- Check that the given *value* is one of the given *types*, or raise an
- exception describing the problem using *name*.
- """
- if not types:
- raise ValueError('expected one or more types, maybe use _textcheck?')
- if not isinstance(value, types):
- raise TypeError("expected %s for %s, got %r"
- % (" or ".join([t.__name__ for t in types]),
- name, value))
- return value
-
-
-def _textcheck(name, value, delims=frozenset(), nullable=False):
- if not isinstance(value, unicode):
- if nullable and value is None:
- return value # used by query string values
- else:
- str_name = "unicode" if PY2 else "str"
- exp = str_name + ' or NoneType' if nullable else str_name
- raise TypeError('expected %s for %s, got %r' % (exp, name, value))
- if delims and set(value) & set(delims): # TODO: test caching into regexes
- raise ValueError('one or more reserved delimiters %s present in %s: %r'
- % (''.join(delims), name, value))
- return value
-
-
-def iter_pairs(iterable):
- """
- Iterate over the (key, value) pairs in ``iterable``.
-
- This handles dictionaries sensibly, and falls back to assuming the
- iterable yields (key, value) pairs. This behaviour is similar to
- what Python's ``dict()`` constructor does.
- """
- if isinstance(iterable, Mapping):
- iterable = iterable.items()
- return iter(iterable)
-
-
-def _decode_unreserved(text, normalize_case=False, encode_stray_percents=False):
- return _percent_decode(text, normalize_case=normalize_case,
- encode_stray_percents=encode_stray_percents,
- _decode_map=_UNRESERVED_DECODE_MAP)
-
-
-def _decode_userinfo_part(text, normalize_case=False, encode_stray_percents=False):
- return _percent_decode(text, normalize_case=normalize_case,
- encode_stray_percents=encode_stray_percents,
- _decode_map=_USERINFO_DECODE_MAP)
-
-
-def _decode_path_part(text, normalize_case=False, encode_stray_percents=False):
- """
- >>> _decode_path_part(u'%61%77%2f%7a')
- u'aw%2fz'
- >>> _decode_path_part(u'%61%77%2f%7a', normalize_case=True)
- u'aw%2Fz'
- """
- return _percent_decode(text, normalize_case=normalize_case,
- encode_stray_percents=encode_stray_percents,
- _decode_map=_PATH_DECODE_MAP)
-
-
-def _decode_query_key(text, normalize_case=False, encode_stray_percents=False):
- return _percent_decode(text, normalize_case=normalize_case,
- encode_stray_percents=encode_stray_percents,
- _decode_map=_QUERY_KEY_DECODE_MAP)
-
-
-def _decode_query_value(text, normalize_case=False, encode_stray_percents=False):
- return _percent_decode(text, normalize_case=normalize_case,
- encode_stray_percents=encode_stray_percents,
- _decode_map=_QUERY_VALUE_DECODE_MAP)
-
-
-def _decode_fragment_part(text, normalize_case=False, encode_stray_percents=False):
- return _percent_decode(text, normalize_case=normalize_case,
- encode_stray_percents=encode_stray_percents,
- _decode_map=_FRAGMENT_DECODE_MAP)
-
-
-def _percent_decode(text, normalize_case=False, subencoding='utf-8',
- raise_subencoding_exc=False, encode_stray_percents=False,
- _decode_map=_HEX_CHAR_MAP):
- """Convert percent-encoded text characters to their normal,
- human-readable equivalents.
-
- All characters in the input text must be encodable by
- *subencoding*. All special characters underlying the values in the
- percent-encoding must be decodable as *subencoding*. If a
- non-*subencoding*-valid string is passed, the original text is
- returned with no changes applied.
-
- Only called by field-tailored variants, e.g.,
- :func:`_decode_path_part`, as every percent-encodable part of the
- URL has characters which should not be percent decoded.
-
- >>> _percent_decode(u'abc%20def')
- u'abc def'
-
- Args:
- text (unicode): Text with percent-encoding present.
- normalize_case (bool): Whether undecoded percent segments, such
- as encoded delimiters, should be uppercased, per RFC 3986
- Section 2.1. See :func:`_decode_path_part` for an example.
- subencoding (unicode): The name of the encoding underlying the
- percent-encoding. Pass `False` to get back raw bytes.
- raise_subencoding_exc (bool): Whether an error in decoding the bytes
- underlying the percent-decoding should be raised.
-
- Returns:
- unicode: The percent-decoded version of *text*, decoded by
- *subencoding*, unless `subencoding=False` which returns bytes.
-
- """
- try:
- quoted_bytes = text.encode('utf-8' if subencoding is False else subencoding)
- except UnicodeEncodeError:
- return text
-
- bits = quoted_bytes.split(b'%')
- if len(bits) == 1:
- return text
-
- res = [bits[0]]
- append = res.append
-
- for item in bits[1:]:
- hexpair, rest = item[:2], item[2:]
- try:
- append(_decode_map[hexpair])
- append(rest)
- except KeyError:
- pair_is_hex = hexpair in _HEX_CHAR_MAP
- if pair_is_hex or not encode_stray_percents:
- append(b'%')
- else:
- # if it's undecodable, treat as a real percent sign,
- # which is reserved (because it wasn't in the
- # context-aware _decode_map passed in), and should
- # stay in an encoded state.
- append(b'%25')
- if normalize_case and pair_is_hex:
- append(hexpair.upper())
- append(rest)
- else:
- append(item)
-
- unquoted_bytes = b''.join(res)
-
- if subencoding is False:
- return unquoted_bytes
- try:
- return unquoted_bytes.decode(subencoding)
- except UnicodeDecodeError:
- if raise_subencoding_exc:
- raise
- return text
-
-
-def _decode_host(host):
- """Decode a host from ASCII-encodable text to IDNA-decoded text. If
- the host text is not ASCII, it is returned unchanged, as it is
- presumed that it is already IDNA-decoded.
-
- Some technical details: _decode_host is built on top of the "idna"
- package, which has some quirks:
-
- Capital letters are not valid IDNA2008. The idna package will
- raise an exception like this on capital letters:
-
- > idna.core.InvalidCodepoint: Codepoint U+004B at position 1 ... not allowed
-
- However, if a segment of a host (i.e., something in
- url.host.split('.')) is already ASCII, idna doesn't perform its
- usual checks. In fact, for capital letters it automatically
- lowercases them.
-
- This check and some other functionality can be bypassed by passing
- uts46=True to idna.encode/decode. This allows a more permissive and
- convenient interface. So far it seems like the balanced approach.
-
- Example output (from idna==2.6):
-
- >> idna.encode(u'mahmöud.io')
- 'xn--mahmud-zxa.io'
- >> idna.encode(u'Mahmöud.io')
- Traceback (most recent call last):
- File "", line 1, in
- File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 355, in encode
- result.append(alabel(label))
- File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 276, in alabel
- check_label(label)
- File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 253, in check_label
- raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
- idna.core.InvalidCodepoint: Codepoint U+004D at position 1 of u'Mahm\xf6ud' not allowed
- >> idna.encode(u'Mahmoud.io')
- 'Mahmoud.io'
-
- # Similar behavior for decodes below
- >> idna.decode(u'Mahmoud.io')
- u'mahmoud.io
- >> idna.decode(u'Méhmoud.io', uts46=True)
- u'm\xe9hmoud.io'
- """
- if not host:
- return u''
- try:
- host_bytes = host.encode("ascii")
- except UnicodeEncodeError:
- host_text = host
- else:
- try:
- host_text = idna_decode(host_bytes, uts46=True)
- except ValueError:
- # only reached on "narrow" (UCS-2) Python builds <3.4, see #7
- # NOTE: not going to raise here, because there's no
- # ambiguity in the IDNA, and the host is still
- # technically usable
- host_text = host
- return host_text
-
-
-def _resolve_dot_segments(path):
- """Normalize the URL path by resolving segments of '.' and '..'. For
- more details, see `RFC 3986 section 5.2.4, Remove Dot Segments`_.
-
- Args:
- path (list): path segments in string form
-
- Returns:
- list: a new list of path segments with the '.' and '..' elements
- removed and resolved.
-
- .. _RFC 3986 section 5.2.4, Remove Dot Segments: https://tools.ietf.org/html/rfc3986#section-5.2.4
- """
- segs = []
-
- for seg in path:
- if seg == u'.':
- pass
- elif seg == u'..':
- if segs:
- segs.pop()
- else:
- segs.append(seg)
-
- if list(path[-1:]) in ([u'.'], [u'..']):
- segs.append(u'')
-
- return segs
-
-
-def parse_host(host):
- """Parse the host into a tuple of ``(family, host)``, where family
- is the appropriate :mod:`socket` module constant when the host is
- an IP address. Family is ``None`` when the host is not an IP.
-
- Will raise :class:`URLParseError` on invalid IPv6 constants.
-
- Returns:
- tuple: family (socket constant or None), host (string)
-
- >>> parse_host('googlewebsite.com') == (None, 'googlewebsite.com')
- True
- >>> parse_host('::1') == (socket.AF_INET6, '::1')
- True
- >>> parse_host('192.168.1.1') == (socket.AF_INET, '192.168.1.1')
- True
- """
- if not host:
- return None, u''
- if u':' in host:
- try:
- inet_pton(socket.AF_INET6, host)
- except socket.error as se:
- raise URLParseError('invalid IPv6 host: %r (%r)' % (host, se))
- except UnicodeEncodeError:
- pass # TODO: this can't be a real host right?
- else:
- family = socket.AF_INET6
- return family, host
- try:
- inet_pton(socket.AF_INET, host)
- except (socket.error, UnicodeEncodeError):
- family = None # not an IP
- else:
- family = socket.AF_INET
- return family, host
-
-
-class URL(object):
- """From blogs to billboards, URLs are so common, that it's easy to
- overlook their complexity and power. With hyperlink's
- :class:`URL` type, working with URLs doesn't have to be hard.
-
- URLs are made of many parts. Most of these parts are officially
- named in `RFC 3986`_ and this diagram may prove handy in identifying
- them::
-
- foo://user:pass@example.com:8042/over/there?name=ferret#nose
- \_/ \_______/ \_________/ \__/\_________/ \_________/ \__/
- | | | | | | |
- scheme userinfo host port path query fragment
-
- While :meth:`~URL.from_text` is used for parsing whole URLs, the
- :class:`URL` constructor builds a URL from the individual
- components, like so::
-
- >>> from hyperlink import URL
- >>> url = URL(scheme=u'https', host=u'example.com', path=[u'hello', u'world'])
- >>> print(url.to_text())
- https://example.com/hello/world
-
- The constructor runs basic type checks. All strings are expected
- to be decoded (:class:`unicode` in Python 2). All arguments are
- optional, defaulting to appropriately empty values. A full list of
- constructor arguments is below.
-
- Args:
- scheme (unicode): The text name of the scheme.
- host (unicode): The host portion of the network location
- port (int): The port part of the network location. If
- ``None`` or no port is passed, the port will default to
- the default port of the scheme, if it is known. See the
- ``SCHEME_PORT_MAP`` and :func:`register_default_port`
- for more info.
- path (tuple): A tuple of strings representing the
- slash-separated parts of the path.
- query (tuple): The query parameters, as a dictionary or
- as an iterable of key-value pairs.
- fragment (unicode): The fragment part of the URL.
- rooted (bool): Whether or not the path begins with a slash.
- userinfo (unicode): The username or colon-separated
- username:password pair.
- uses_netloc (bool): Indicates whether two slashes appear
- between the scheme and the host (``http://eg.com`` vs
- ``mailto:e@g.com``). Set automatically based on scheme.
-
- All of these parts are also exposed as read-only attributes of
- URL instances, along with several useful methods.
-
- .. _RFC 3986: https://tools.ietf.org/html/rfc3986
- .. _RFC 3987: https://tools.ietf.org/html/rfc3987
- """
-
- def __init__(self, scheme=None, host=None, path=(), query=(), fragment=u'',
- port=None, rooted=None, userinfo=u'', uses_netloc=None):
- if host is not None and scheme is None:
- scheme = u'http' # TODO: why
- if port is None:
- port = SCHEME_PORT_MAP.get(scheme)
- if host and query and not path:
- # per RFC 3986 6.2.3, "a URI that uses the generic syntax
- # for authority with an empty path should be normalized to
- # a path of '/'."
- path = (u'',)
-
- # Now that we're done detecting whether they were passed, we can set
- # them to their defaults:
- if scheme is None:
- scheme = u''
- if host is None:
- host = u''
- if rooted is None:
- rooted = bool(host)
-
- # Set attributes.
- self._scheme = _textcheck("scheme", scheme)
- if self._scheme:
- if not _SCHEME_RE.match(self._scheme):
- raise ValueError('invalid scheme: %r. Only alphanumeric, "+",'
- ' "-", and "." allowed. Did you meant to call'
- ' %s.from_text()?'
- % (self._scheme, self.__class__.__name__))
-
- _, self._host = parse_host(_textcheck('host', host, '/?#@'))
- if isinstance(path, unicode):
- raise TypeError("expected iterable of text for path, not: %r"
- % (path,))
- self._path = tuple((_textcheck("path segment", segment, '/?#')
- for segment in path))
- self._query = tuple(
- (_textcheck("query parameter name", k, '&=#'),
- _textcheck("query parameter value", v, '', nullable=True))
- for k, v in iter_pairs(query))
- self._fragment = _textcheck("fragment", fragment)
- self._port = _typecheck("port", port, int, NoneType)
- self._rooted = _typecheck("rooted", rooted, bool)
- self._userinfo = _textcheck("userinfo", userinfo, '/?#@')
-
- uses_netloc = scheme_uses_netloc(self._scheme, uses_netloc)
- self._uses_netloc = _typecheck("uses_netloc",
- uses_netloc, bool, NoneType)
-
- return
-
- def get_decoded_url(self, lazy=False):
- try:
- return self._decoded_url
- except AttributeError:
- self._decoded_url = DecodedURL(self, lazy=lazy)
- return self._decoded_url
-
- @property
- def scheme(self):
- """The scheme is a string, and the first part of an absolute URL, the
- part before the first colon, and the part which defines the
- semantics of the rest of the URL. Examples include "http",
- "https", "ssh", "file", "mailto", and many others. See
- :func:`~hyperlink.register_scheme()` for more info.
- """
- return self._scheme
-
- @property
- def host(self):
- """The host is a string, and the second standard part of an absolute
- URL. When present, a valid host must be a domain name, or an
- IP (v4 or v6). It occurs before the first slash, or the second
- colon, if a :attr:`~hyperlink.URL.port` is provided.
- """
- return self._host
-
- @property
- def port(self):
- """The port is an integer that is commonly used in connecting to the
- :attr:`host`, and almost never appears without it.
-
- When not present in the original URL, this attribute defaults
- to the scheme's default port. If the scheme's default port is
- not known, and the port is not provided, this attribute will
- be set to None.
-
- >>> URL.from_text(u'http://example.com/pa/th').port
- 80
- >>> URL.from_text(u'foo://example.com/pa/th').port
- >>> URL.from_text(u'foo://example.com:8042/pa/th').port
- 8042
-
- .. note::
-
- Per the standard, when the port is the same as the schemes
- default port, it will be omitted in the text URL.
-
- """
- return self._port
-
- @property
- def path(self):
- """A tuple of strings, created by splitting the slash-separated
- hierarchical path. Started by the first slash after the host,
- terminated by a "?", which indicates the start of the
- :attr:`~hyperlink.URL.query` string.
- """
- return self._path
-
- @property
- def query(self):
- """Tuple of pairs, created by splitting the ampersand-separated
- mapping of keys and optional values representing
- non-hierarchical data used to identify the resource. Keys are
- always strings. Values are strings when present, or None when
- missing.
-
- For more operations on the mapping, see
- :meth:`~hyperlink.URL.get()`, :meth:`~hyperlink.URL.add()`,
- :meth:`~hyperlink.URL.set()`, and
- :meth:`~hyperlink.URL.delete()`.
- """
- return self._query
-
- @property
- def fragment(self):
- """A string, the last part of the URL, indicated by the first "#"
- after the :attr:`~hyperlink.URL.path` or
- :attr:`~hyperlink.URL.query`. Enables indirect identification
- of a secondary resource, like an anchor within an HTML page.
-
- """
- return self._fragment
-
- @property
- def rooted(self):
- """Whether or not the path starts with a forward slash (``/``).
-
- This is taken from the terminology in the BNF grammar,
- specifically the "path-rootless", rule, since "absolute path"
- and "absolute URI" are somewhat ambiguous. :attr:`path` does
- not contain the implicit prefixed ``"/"`` since that is
- somewhat awkward to work with.
-
- """
- return self._rooted
-
- @property
- def userinfo(self):
- """The colon-separated string forming the username-password
- combination.
- """
- return self._userinfo
-
- @property
- def uses_netloc(self):
- """
- """
- return self._uses_netloc
-
- @property
- def user(self):
- """
- The user portion of :attr:`~hyperlink.URL.userinfo`.
- """
- return self.userinfo.split(u':')[0]
-
- def authority(self, with_password=False, **kw):
- """Compute and return the appropriate host/port/userinfo combination.
-
- >>> url = URL.from_text(u'http://user:pass@localhost:8080/a/b?x=y')
- >>> url.authority()
- u'user:@localhost:8080'
- >>> url.authority(with_password=True)
- u'user:pass@localhost:8080'
-
- Args:
- with_password (bool): Whether the return value of this
- method include the password in the URL, if it is
- set. Defaults to False.
-
- Returns:
- str: The authority (network location and user information) portion
- of the URL.
- """
- # first, a bit of twisted compat
- with_password = kw.pop('includeSecrets', with_password)
- if kw:
- raise TypeError('got unexpected keyword arguments: %r' % kw.keys())
- host = self.host
- if ':' in host:
- hostport = ['[' + host + ']']
- else:
- hostport = [self.host]
- if self.port != SCHEME_PORT_MAP.get(self.scheme):
- hostport.append(unicode(self.port))
- authority = []
- if self.userinfo:
- userinfo = self.userinfo
- if not with_password and u":" in userinfo:
- userinfo = userinfo[:userinfo.index(u":") + 1]
- authority.append(userinfo)
- authority.append(u":".join(hostport))
- return u"@".join(authority)
-
- def __eq__(self, other):
- if not isinstance(other, self.__class__):
- return NotImplemented
- for attr in ['scheme', 'userinfo', 'host', 'query',
- 'fragment', 'port', 'uses_netloc']:
- if getattr(self, attr) != getattr(other, attr):
- return False
- if self.path == other.path or (self.path in _ROOT_PATHS
- and other.path in _ROOT_PATHS):
- return True
- return False
-
- def __ne__(self, other):
- if not isinstance(other, self.__class__):
- return NotImplemented
- return not self.__eq__(other)
-
- def __hash__(self):
- return hash((self.__class__, self.scheme, self.userinfo, self.host,
- self.path, self.query, self.fragment, self.port,
- self.rooted, self.uses_netloc))
-
- @property
- def absolute(self):
- """Whether or not the URL is "absolute". Absolute URLs are complete
- enough to resolve to a network resource without being relative
- to a base URI.
-
- >>> URL.from_text(u'http://wikipedia.org/').absolute
- True
- >>> URL.from_text(u'?a=b&c=d').absolute
- False
-
- Absolute URLs must have both a scheme and a host set.
- """
- return bool(self.scheme and self.host)
-
- def replace(self, scheme=_UNSET, host=_UNSET, path=_UNSET, query=_UNSET,
- fragment=_UNSET, port=_UNSET, rooted=_UNSET, userinfo=_UNSET,
- uses_netloc=_UNSET):
- """:class:`URL` objects are immutable, which means that attributes
- are designed to be set only once, at construction. Instead of
- modifying an existing URL, one simply creates a copy with the
- desired changes.
-
- If any of the following arguments is omitted, it defaults to
- the value on the current URL.
-
- Args:
- scheme (unicode): The text name of the scheme.
- host (unicode): The host portion of the network location
- port (int): The port part of the network location.
- path (tuple): A tuple of strings representing the
- slash-separated parts of the path.
- query (tuple): The query parameters, as a tuple of
- key-value pairs.
- query (tuple): The query parameters, as a dictionary or
- as an iterable of key-value pairs.
- fragment (unicode): The fragment part of the URL.
- rooted (bool): Whether or not the path begins with a slash.
- userinfo (unicode): The username or colon-separated
- username:password pair.
- uses_netloc (bool): Indicates whether two slashes appear
- between the scheme and the host (``http://eg.com`` vs
- ``mailto:e@g.com``)
-
- Returns:
- URL: a copy of the current :class:`URL`, with new values for
- parameters passed.
-
- """
- return self.__class__(
- scheme=_optional(scheme, self.scheme),
- host=_optional(host, self.host),
- path=_optional(path, self.path),
- query=_optional(query, self.query),
- fragment=_optional(fragment, self.fragment),
- port=_optional(port, self.port),
- rooted=_optional(rooted, self.rooted),
- userinfo=_optional(userinfo, self.userinfo),
- uses_netloc=_optional(uses_netloc, self.uses_netloc)
- )
-
- @classmethod
- def from_text(cls, text):
- """Whereas the :class:`URL` constructor is useful for constructing
- URLs from parts, :meth:`~URL.from_text` supports parsing whole
- URLs from their string form::
-
- >>> URL.from_text(u'http://example.com')
- URL.from_text(u'http://example.com')
- >>> URL.from_text(u'?a=b&x=y')
- URL.from_text(u'?a=b&x=y')
-
- As you can see above, it's also used as the :func:`repr` of
- :class:`URL` objects. The natural counterpart to
- :func:`~URL.to_text()`. This method only accepts *text*, so be
- sure to decode those bytestrings.
-
- Args:
- text (unicode): A valid URL string.
-
- Returns:
- URL: The structured object version of the parsed string.
-
- .. note::
-
- Somewhat unexpectedly, URLs are a far more permissive
- format than most would assume. Many strings which don't
- look like URLs are still valid URLs. As a result, this
- method only raises :class:`URLParseError` on invalid port
- and IPv6 values in the host portion of the URL.
-
- """
- um = _URL_RE.match(_textcheck('text', text))
- try:
- gs = um.groupdict()
- except AttributeError:
- raise URLParseError('could not parse url: %r' % text)
-
- au_text = gs['authority'] or u''
- au_m = _AUTHORITY_RE.match(au_text)
- try:
- au_gs = au_m.groupdict()
- except AttributeError:
- raise URLParseError('invalid authority %r in url: %r'
- % (au_text, text))
- if au_gs['bad_host']:
- raise URLParseError('invalid host %r in url: %r'
- % (au_gs['bad_host'], text))
-
- userinfo = au_gs['userinfo'] or u''
-
- host = au_gs['ipv6_host'] or au_gs['plain_host']
- port = au_gs['port']
- if port is not None:
- try:
- port = int(port)
- except ValueError:
- if not port: # TODO: excessive?
- raise URLParseError('port must not be empty: %r' % au_text)
- raise URLParseError('expected integer for port, not %r' % port)
-
- scheme = gs['scheme'] or u''
- fragment = gs['fragment'] or u''
- uses_netloc = bool(gs['_netloc_sep'])
-
- if gs['path']:
- path = gs['path'].split(u"/")
- if not path[0]:
- path.pop(0)
- rooted = True
- else:
- rooted = False
- else:
- path = ()
- rooted = bool(au_text)
- if gs['query']:
- query = ((qe.split(u"=", 1) if u'=' in qe else (qe, None))
- for qe in gs['query'].split(u"&"))
- else:
- query = ()
- return cls(scheme, host, path, query, fragment, port,
- rooted, userinfo, uses_netloc)
-
- def normalize(self, scheme=True, host=True, path=True, query=True,
- fragment=True, userinfo=True, percents=True):
- """Return a new URL object with several standard normalizations
- applied:
-
- * Decode unreserved characters (`RFC 3986 2.3`_)
- * Uppercase remaining percent-encoded octets (`RFC 3986 2.1`_)
- * Convert scheme and host casing to lowercase (`RFC 3986 3.2.2`_)
- * Resolve any "." and ".." references in the path (`RFC 3986 6.2.2.3`_)
- * Ensure an ending slash on URLs with an empty path (`RFC 3986 6.2.3`_)
- * Encode any stray percent signs (`%`) in percent-encoded
- fields (path, query, fragment, userinfo) (`RFC 3986 2.4`_)
-
- All are applied by default, but normalizations can be disabled
- per-part by passing `False` for that part's corresponding
- name.
-
- Args:
- scheme (bool): Convert the scheme to lowercase
- host (bool): Convert the host to lowercase
- path (bool): Normalize the path (see above for details)
- query (bool): Normalize the query string
- fragment (bool): Normalize the fragment
- userinfo (bool): Normalize the userinfo
- percents (bool): Encode isolated percent signs
- for any percent-encoded fields which are being
- normalized (defaults to True).
-
- >>> url = URL.from_text(u'Http://example.COM/a/../b/./c%2f?%61%')
- >>> print(url.normalize().to_text())
- http://example.com/b/c%2F?a%25
-
- .. _RFC 3986 3.2.2: https://tools.ietf.org/html/rfc3986#section-3.2.2
- .. _RFC 3986 2.3: https://tools.ietf.org/html/rfc3986#section-2.3
- .. _RFC 3986 2.1: https://tools.ietf.org/html/rfc3986#section-2.1
- .. _RFC 3986 6.2.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.2.3
- .. _RFC 3986 6.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.3
- .. _RFC 3986 2.4: https://tools.ietf.org/html/rfc3986#section-2.4
-
- """
- kw = {}
- if scheme:
- kw['scheme'] = self.scheme.lower()
- if host:
- kw['host'] = self.host.lower()
- def _dec_unres(target):
- return _decode_unreserved(target, normalize_case=True,
- encode_stray_percents=percents)
- if path:
- if self.path:
- kw['path'] = [_dec_unres(p) for p in _resolve_dot_segments(self.path)]
- else:
- kw['path'] = (u'',)
- if query:
- kw['query'] = [(_dec_unres(k), _dec_unres(v) if v else v)
- for k, v in self.query]
- if fragment:
- kw['fragment'] = _dec_unres(self.fragment)
- if userinfo:
- kw['userinfo'] = u':'.join([_dec_unres(p)
- for p in self.userinfo.split(':', 1)])
-
- return self.replace(**kw)
-
- def child(self, *segments):
- """Make a new :class:`URL` where the given path segments are a child
- of this URL, preserving other parts of the URL, including the
- query string and fragment.
-
- For example::
-
- >>> url = URL.from_text(u'http://localhost/a/b?x=y')
- >>> child_url = url.child(u"c", u"d")
- >>> child_url.to_text()
- u'http://localhost/a/b/c/d?x=y'
-
- Args:
- segments (unicode): Additional parts to be joined and added to
- the path, like :func:`os.path.join`. Special characters
- in segments will be percent encoded.
-
- Returns:
- URL: A copy of the current URL with the extra path segments.
-
- """
- if not segments:
- return self
-
- segments = [_textcheck('path segment', s) for s in segments]
- new_segs = _encode_path_parts(segments, joined=False, maximal=False)
- new_path = self.path[:-1 if (self.path and self.path[-1] == u'')
- else None] + new_segs
- return self.replace(path=new_path)
-
- def sibling(self, segment):
- """Make a new :class:`URL` with a single path segment that is a
- sibling of this URL path.
-
- Args:
- segment (unicode): A single path segment.
-
- Returns:
- URL: A copy of the current URL with the last path segment
- replaced by *segment*. Special characters such as
- ``/?#`` will be percent encoded.
-
- """
- _textcheck('path segment', segment)
- new_path = self.path[:-1] + (_encode_path_part(segment),)
- return self.replace(path=new_path)
-
- def click(self, href=u''):
- """Resolve the given URL relative to this URL.
-
- The resulting URI should match what a web browser would
- generate if you visited the current URL and clicked on *href*.
-
- >>> url = URL.from_text(u'http://blog.hatnote.com/')
- >>> url.click(u'/post/155074058790').to_text()
- u'http://blog.hatnote.com/post/155074058790'
- >>> url = URL.from_text(u'http://localhost/a/b/c/')
- >>> url.click(u'../d/./e').to_text()
- u'http://localhost/a/b/d/e'
-
- Args:
- href (unicode): A string representing a clicked URL.
-
- Return:
- URL: A copy of the current URL with navigation logic applied.
-
- For more information, see `RFC 3986 section 5`_.
-
- .. _RFC 3986 section 5: https://tools.ietf.org/html/rfc3986#section-5
- """
- if href:
- if isinstance(href, URL):
- clicked = href
- else:
- # TODO: This error message is not completely accurate,
- # as URL objects are now also valid, but Twisted's
- # test suite (wrongly) relies on this exact message.
- _textcheck('relative URL', href)
- clicked = URL.from_text(href)
- if clicked.absolute:
- return clicked
- else:
- clicked = self
-
- query = clicked.query
- if clicked.scheme and not clicked.rooted:
- # Schemes with relative paths are not well-defined. RFC 3986 calls
- # them a "loophole in prior specifications" that should be avoided,
- # or supported only for backwards compatibility.
- raise NotImplementedError('absolute URI with rootless path: %r'
- % (href,))
- else:
- if clicked.rooted:
- path = clicked.path
- elif clicked.path:
- path = self.path[:-1] + clicked.path
- else:
- path = self.path
- if not query:
- query = self.query
- return self.replace(scheme=clicked.scheme or self.scheme,
- host=clicked.host or self.host,
- port=clicked.port or self.port,
- path=_resolve_dot_segments(path),
- query=query,
- fragment=clicked.fragment)
-
- def to_uri(self):
- u"""Make a new :class:`URL` instance with all non-ASCII characters
- appropriately percent-encoded. This is useful to do in preparation
- for sending a :class:`URL` over a network protocol.
-
- For example::
-
- >>> URL.from_text(u'https://ايران.com/foo⇧bar/').to_uri()
- URL.from_text(u'https://xn--mgba3a4fra.com/foo%E2%87%A7bar/')
-
- Returns:
- URL: A new instance with its path segments, query parameters, and
- hostname encoded, so that they are all in the standard
- US-ASCII range.
- """
- new_userinfo = u':'.join([_encode_userinfo_part(p) for p in
- self.userinfo.split(':', 1)])
- new_path = _encode_path_parts(self.path, has_scheme=bool(self.scheme),
- rooted=False, joined=False, maximal=True)
- new_host = self.host if not self.host else idna_encode(self.host, uts46=True).decode("ascii")
- return self.replace(
- userinfo=new_userinfo,
- host=new_host,
- path=new_path,
- query=tuple([(_encode_query_key(k, maximal=True),
- _encode_query_value(v, maximal=True)
- if v is not None else None)
- for k, v in self.query]),
- fragment=_encode_fragment_part(self.fragment, maximal=True)
- )
-
- def to_iri(self):
- u"""Make a new :class:`URL` instance with all but a few reserved
- characters decoded into human-readable format.
-
- Percent-encoded Unicode and IDNA-encoded hostnames are
- decoded, like so::
-
- >>> url = URL.from_text(u'https://xn--mgba3a4fra.example.com/foo%E2%87%A7bar/')
- >>> print(url.to_iri().to_text())
- https://ايران.example.com/foo⇧bar/
-
- .. note::
-
- As a general Python issue, "narrow" (UCS-2) builds of
- Python may not be able to fully decode certain URLs, and
- the in those cases, this method will return a best-effort,
- partially-decoded, URL which is still valid. This issue
- does not affect any Python builds 3.4+.
-
- Returns:
- URL: A new instance with its path segments, query parameters, and
- hostname decoded for display purposes.
- """
- new_userinfo = u':'.join([_decode_userinfo_part(p) for p in
- self.userinfo.split(':', 1)])
- host_text = _decode_host(self.host)
-
- return self.replace(userinfo=new_userinfo,
- host=host_text,
- path=[_decode_path_part(segment)
- for segment in self.path],
- query=[(_decode_query_key(k),
- _decode_query_value(v)
- if v is not None else None)
- for k, v in self.query],
- fragment=_decode_fragment_part(self.fragment))
-
- def to_text(self, with_password=False):
- """Render this URL to its textual representation.
-
- By default, the URL text will *not* include a password, if one
- is set. RFC 3986 considers using URLs to represent such
- sensitive information as deprecated. Quoting from RFC 3986,
- `section 3.2.1`:
-
- "Applications should not render as clear text any data after the
- first colon (":") character found within a userinfo subcomponent
- unless the data after the colon is the empty string (indicating no
- password)."
-
- Args:
- with_password (bool): Whether or not to include the
- password in the URL text. Defaults to False.
-
- Returns:
- str: The serialized textual representation of this URL,
- such as ``u"http://example.com/some/path?some=query"``.
-
- The natural counterpart to :class:`URL.from_text()`.
-
- .. _section 3.2.1: https://tools.ietf.org/html/rfc3986#section-3.2.1
- """
- scheme = self.scheme
- authority = self.authority(with_password)
- path = _encode_path_parts(self.path,
- rooted=self.rooted,
- has_scheme=bool(scheme),
- has_authority=bool(authority),
- maximal=False)
- query_parts = []
- for k, v in self.query:
- if v is None:
- query_parts.append(_encode_query_key(k, maximal=False))
- else:
- query_parts.append(u'='.join((_encode_query_key(k, maximal=False),
- _encode_query_value(v, maximal=False))))
- query_string = u'&'.join(query_parts)
-
- fragment = self.fragment
-
- parts = []
- _add = parts.append
- if scheme:
- _add(scheme)
- _add(':')
- if authority:
- _add('//')
- _add(authority)
- elif (scheme and path[:2] != '//' and self.uses_netloc):
- _add('//')
- if path:
- if scheme and authority and path[:1] != '/':
- _add('/') # relpaths with abs authorities auto get '/'
- _add(path)
- if query_string:
- _add('?')
- _add(query_string)
- if fragment:
- _add('#')
- _add(fragment)
- return u''.join(parts)
-
- def __repr__(self):
- """Convert this URL to an representation that shows all of its
- constituent parts, as well as being a valid argument to
- :func:`eval`.
- """
- return '%s.from_text(%r)' % (self.__class__.__name__, self.to_text())
-
- def _to_bytes(self):
- """
- Allows for direct usage of URL objects with libraries like
- requests, which automatically stringify URL parameters. See
- issue #49.
- """
- return self.to_uri().to_text().encode('ascii')
-
- if PY2:
- __str__ = _to_bytes
- __unicode__ = to_text
- else:
- __bytes__ = _to_bytes
- __str__ = to_text
-
- # # Begin Twisted Compat Code
- asURI = to_uri
- asIRI = to_iri
-
- @classmethod
- def fromText(cls, s):
- return cls.from_text(s)
-
- def asText(self, includeSecrets=False):
- return self.to_text(with_password=includeSecrets)
-
- def __dir__(self):
- try:
- ret = object.__dir__(self)
- except AttributeError:
- # object.__dir__ == AttributeError # pdw for py2
- ret = dir(self.__class__) + list(self.__dict__.keys())
- ret = sorted(set(ret) - set(['fromText', 'asURI', 'asIRI', 'asText']))
- return ret
-
- # # End Twisted Compat Code
-
- def add(self, name, value=None):
- """Make a new :class:`URL` instance with a given query argument,
- *name*, added to it with the value *value*, like so::
-
- >>> URL.from_text(u'https://example.com/?x=y').add(u'x')
- URL.from_text(u'https://example.com/?x=y&x')
- >>> URL.from_text(u'https://example.com/?x=y').add(u'x', u'z')
- URL.from_text(u'https://example.com/?x=y&x=z')
-
- Args:
- name (unicode): The name of the query parameter to add. The
- part before the ``=``.
- value (unicode): The value of the query parameter to add. The
- part after the ``=``. Defaults to ``None``, meaning no
- value.
-
- Returns:
- URL: A new :class:`URL` instance with the parameter added.
- """
- return self.replace(query=self.query + ((name, value),))
-
- def set(self, name, value=None):
- """Make a new :class:`URL` instance with the query parameter *name*
- set to *value*. All existing occurences, if any are replaced
- by the single name-value pair.
-
- >>> URL.from_text(u'https://example.com/?x=y').set(u'x')
- URL.from_text(u'https://example.com/?x')
- >>> URL.from_text(u'https://example.com/?x=y').set(u'x', u'z')
- URL.from_text(u'https://example.com/?x=z')
-
- Args:
- name (unicode): The name of the query parameter to set. The
- part before the ``=``.
- value (unicode): The value of the query parameter to set. The
- part after the ``=``. Defaults to ``None``, meaning no
- value.
-
- Returns:
- URL: A new :class:`URL` instance with the parameter set.
- """
- # Preserve the original position of the query key in the list
- q = [(k, v) for (k, v) in self.query if k != name]
- idx = next((i for (i, (k, v)) in enumerate(self.query)
- if k == name), -1)
- q[idx:idx] = [(name, value)]
- return self.replace(query=q)
-
- def get(self, name):
- """Get a list of values for the given query parameter, *name*::
-
- >>> url = URL.from_text(u'?x=1&x=2')
- >>> url.get('x')
- [u'1', u'2']
- >>> url.get('y')
- []
-
- If the given *name* is not set, an empty list is returned. A
- list is always returned, and this method raises no exceptions.
-
- Args:
- name (unicode): The name of the query parameter to get.
-
- Returns:
- list: A list of all the values associated with the key, in
- string form.
-
- """
- return [value for (key, value) in self.query if name == key]
-
- def remove(self, name, value=_UNSET, limit=None):
- """Make a new :class:`URL` instance with occurrences of the query
- parameter *name* removed, or, if *value* is set, parameters
- matching *name* and *value*. No exception is raised if the
- parameter is not already set.
-
- Args:
- name (unicode): The name of the query parameter to remove.
- value (unicode): Optional value to additionally filter
- on. Setting this removes query parameters which match
- both name and value.
- limit (int): Optional maximum number of parameters to remove.
-
- Returns:
- URL: A new :class:`URL` instance with the parameter removed.
- """
- if limit is None:
- if value is _UNSET:
- nq = [(k, v) for (k, v) in self.query if k != name]
- else:
- nq = [(k, v) for (k, v) in self.query if not (k == name and v == value)]
- else:
- nq, removed_count = [], 0
-
- for k, v in self.query:
- if k == name and (value is _UNSET or v == value) and removed_count < limit:
- removed_count += 1 # drop it
- else:
- nq.append((k, v)) # keep it
-
- return self.replace(query=nq)
-
-
-EncodedURL = URL # An alias better describing what the URL really is
-
-
-class DecodedURL(object):
- """DecodedURL is a type meant to act as a higher-level interface to
- the URL. It is the `unicode` to URL's `bytes`. `DecodedURL` has
- almost exactly the same API as `URL`, but everything going in and
- out is in its maximally decoded state. All percent decoding is
- handled automatically.
-
- Where applicable, a UTF-8 encoding is presumed. Be advised that
- some interactions can raise :exc:`UnicodeEncodeErrors` and
- :exc:`UnicodeDecodeErrors`, just like when working with
- bytestrings. Examples of such interactions include handling query
- strings encoding binary data, and paths containing segments with
- special characters encoded with codecs other than UTF-8.
-
- Args:
- url (URL): A :class:`URL` object to wrap.
- lazy (bool): Set to True to avoid pre-decode all parts of the
- URL to check for validity. Defaults to False.
-
- """
- def __init__(self, url, lazy=False):
- self._url = url
- if not lazy:
- # cache the following, while triggering any decoding
- # issues with decodable fields
- self.host, self.userinfo, self.path, self.query, self.fragment
- return
-
- @classmethod
- def from_text(cls, text, lazy=False):
- """\
- Make a `DecodedURL` instance from any text string containing a URL.
-
- Args:
- text (unicode): Text containing the URL
- lazy (bool): Whether to pre-decode all parts of the URL to
- check for validity. Defaults to True.
- """
- _url = URL.from_text(text)
- return cls(_url, lazy=lazy)
-
- @property
- def encoded_url(self):
- """Access the underlying :class:`URL` object, which has any special
- characters encoded.
- """
- return self._url
-
- def to_text(self, *a, **kw):
- "Passthrough to :meth:`~hyperlink.URL.to_text()`"
- return self._url.to_text(*a, **kw)
-
- def to_uri(self, *a, **kw):
- "Passthrough to :meth:`~hyperlink.URL.to_uri()`"
- return self._url.to_uri(*a, **kw)
-
- def to_iri(self, *a, **kw):
- "Passthrough to :meth:`~hyperlink.URL.to_iri()`"
- return self._url.to_iri(*a, **kw)
-
- def click(self, href=u''):
- "Return a new DecodedURL wrapping the result of :meth:`~hyperlink.URL.click()`"
- if isinstance(href, DecodedURL):
- href = href._url
- return self.__class__(self._url.click(href=href))
-
- def sibling(self, segment):
- """Automatically encode any reserved characters in *segment* and
- return a new `DecodedURL` wrapping the result of
- :meth:`~hyperlink.URL.sibling()`
- """
- return self.__class__(self._url.sibling(_encode_reserved(segment)))
-
- def child(self, *segments):
- """Automatically encode any reserved characters in *segments* and
- return a new `DecodedURL` wrapping the result of
- :meth:`~hyperlink.URL.child()`.
- """
- if not segments:
- return self
- new_segs = [_encode_reserved(s) for s in segments]
- return self.__class__(self._url.child(*new_segs))
-
- def normalize(self, *a, **kw):
- "Return a new `DecodedURL` wrapping the result of :meth:`~hyperlink.URL.normalize()`"
- return self.__class__(self._url.normalize(*a, **kw))
-
- @property
- def absolute(self):
- return self._url.absolute
-
- @property
- def scheme(self):
- return self._url.scheme
-
- @property
- def host(self):
- return _decode_host(self._url.host)
-
- @property
- def port(self):
- return self._url.port
-
- @property
- def rooted(self):
- return self._url.rooted
-
- @property
- def path(self):
- try:
- return self._path
- except AttributeError:
- pass
- self._path = tuple([_percent_decode(p, raise_subencoding_exc=True)
- for p in self._url.path])
- return self._path
-
- @property
- def query(self):
- try:
- return self._query
- except AttributeError:
- pass
- _q = [tuple(_percent_decode(x, raise_subencoding_exc=True)
- if x is not None else None
- for x in (k, v))
- for k, v in self._url.query]
- self._query = tuple(_q)
- return self._query
-
- @property
- def fragment(self):
- try:
- return self._fragment
- except AttributeError:
- pass
- frag = self._url.fragment
- self._fragment = _percent_decode(frag, raise_subencoding_exc=True)
- return self._fragment
-
- @property
- def userinfo(self):
- try:
- return self._userinfo
- except AttributeError:
- pass
- self._userinfo = tuple([_percent_decode(p, raise_subencoding_exc=True)
- for p in self._url.userinfo.split(':', 1)])
- return self._userinfo
-
- @property
- def user(self):
- return self.userinfo[0]
-
- @property
- def uses_netloc(self):
- return self._url.uses_netloc
-
- def replace(self, scheme=_UNSET, host=_UNSET, path=_UNSET, query=_UNSET,
- fragment=_UNSET, port=_UNSET, rooted=_UNSET, userinfo=_UNSET,
- uses_netloc=_UNSET):
- """While the signature is the same, this `replace()` differs a little
- from URL.replace. For instance, it accepts userinfo as a
- tuple, not as a string, handling the case of having a username
- containing a `:`. As with the rest of the methods on
- DecodedURL, if you pass a reserved character, it will be
- automatically encoded instead of an error being raised.
-
- """
- if path is not _UNSET:
- path = [_encode_reserved(p) for p in path]
- if query is not _UNSET:
- query = [[_encode_reserved(x)
- if x is not None else None
- for x in (k, v)]
- for k, v in iter_pairs(query)]
- if userinfo is not _UNSET:
- if len(userinfo) > 2:
- raise ValueError('userinfo expected sequence of ["user"] or'
- ' ["user", "password"], got %r' % userinfo)
- userinfo = u':'.join([_encode_reserved(p) for p in userinfo])
- new_url = self._url.replace(scheme=scheme,
- host=host,
- path=path,
- query=query,
- fragment=fragment,
- port=port,
- rooted=rooted,
- userinfo=userinfo,
- uses_netloc=uses_netloc)
- return self.__class__(url=new_url)
-
- def get(self, name):
- "Get the value of all query parameters whose name matches *name*"
- return [v for (k, v) in self.query if name == k]
-
- def add(self, name, value=None):
- "Return a new DecodedURL with the query parameter *name* and *value* added."
- return self.replace(query=self.query + ((name, value),))
-
- def set(self, name, value=None):
- "Return a new DecodedURL with query parameter *name* set to *value*"
- query = self.query
- q = [(k, v) for (k, v) in query if k != name]
- idx = next((i for (i, (k, v)) in enumerate(query) if k == name), -1)
- q[idx:idx] = [(name, value)]
- return self.replace(query=q)
-
- def remove(self, name, value=_UNSET, limit=None):
- """Return a new DecodedURL with query parameter *name* removed.
-
- Optionally also filter for *value*, as well as cap the number
- of parameters removed with *limit*.
- """
- if limit is None:
- if value is _UNSET:
- nq = [(k, v) for (k, v) in self.query if k != name]
- else:
- nq = [(k, v) for (k, v) in self.query if not (k == name and v == value)]
- else:
- nq, removed_count = [], 0
- for k, v in self.query:
- if k == name and (value is _UNSET or v == value) and removed_count < limit:
- removed_count += 1 # drop it
- else:
- nq.append((k, v)) # keep it
-
- return self.replace(query=nq)
-
- def __repr__(self):
- cn = self.__class__.__name__
- return '%s(url=%r)' % (cn, self._url)
-
- def __str__(self):
- # TODO: the underlying URL's __str__ needs to change to make
- # this work as the URL, see #55
- return str(self._url)
-
- def __eq__(self, other):
- if not isinstance(other, self.__class__):
- return NotImplemented
- return self.normalize().to_uri() == other.normalize().to_uri()
-
- def __ne__(self, other):
- if not isinstance(other, self.__class__):
- return NotImplemented
- return not self.__eq__(other)
-
- def __hash__(self):
- return hash((self.__class__, self.scheme, self.userinfo, self.host,
- self.path, self.query, self.fragment, self.port,
- self.rooted, self.uses_netloc))
-
- # # Begin Twisted Compat Code
- asURI = to_uri
- asIRI = to_iri
-
- @classmethod
- def fromText(cls, s, lazy=False):
- return cls.from_text(s, lazy=lazy)
-
- def asText(self, includeSecrets=False):
- return self.to_text(with_password=includeSecrets)
-
- def __dir__(self):
- try:
- ret = object.__dir__(self)
- except AttributeError:
- # object.__dir__ == AttributeError # pdw for py2
- ret = dir(self.__class__) + list(self.__dict__.keys())
- ret = sorted(set(ret) - set(['fromText', 'asURI', 'asIRI', 'asText']))
- return ret
-
- # # End Twisted Compat Code
-
-
-def parse(url, decoded=True, lazy=False):
- """Automatically turn text into a structured URL object.
-
- Args:
-
- decoded (bool): Whether or not to return a :class:`DecodedURL`,
- which automatically handles all
- encoding/decoding/quoting/unquoting for all the various
- accessors of parts of the URL, or an :class:`EncodedURL`,
- which has the same API, but requires handling of special
- characters for different parts of the URL.
-
- lazy (bool): In the case of `decoded=True`, this controls
- whether the URL is decoded immediately or as accessed. The
- default, `lazy=False`, checks all encoded parts of the URL
- for decodability.
- """
- enc_url = EncodedURL.from_text(url)
- if not decoded:
- return enc_url
- dec_url = DecodedURL(enc_url, lazy=lazy)
- return dec_url
diff --git a/hyperlink/test/__init__.py b/hyperlink/test/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/hyperlink/test/common.py b/hyperlink/test/common.py
deleted file mode 100644
index 28eba527..00000000
--- a/hyperlink/test/common.py
+++ /dev/null
@@ -1,58 +0,0 @@
-
-
-from unittest import TestCase
-
-
-class HyperlinkTestCase(TestCase):
- """This type mostly exists to provide a backwards-compatible
- assertRaises method for Python 2.6 testing.
- """
- def assertRaises(self, excClass, callableObj=None, *args, **kwargs):
- """Fail unless an exception of class excClass is raised
- by callableObj when invoked with arguments args and keyword
- arguments kwargs. If a different type of exception is
- raised, it will not be caught, and the test case will be
- deemed to have suffered an error, exactly as for an
- unexpected exception.
-
- If called with callableObj omitted or None, will return a
- context object used like this::
-
- with self.assertRaises(SomeException):
- do_something()
-
- The context manager keeps a reference to the exception as
- the 'exception' attribute. This allows you to inspect the
- exception after the assertion::
-
- with self.assertRaises(SomeException) as cm:
- do_something()
- the_exception = cm.exception
- self.assertEqual(the_exception.error_code, 3)
- """
- context = _AssertRaisesContext(excClass, self)
- if callableObj is None:
- return context
- with context:
- callableObj(*args, **kwargs)
-
-
-class _AssertRaisesContext(object):
- "A context manager used to implement HyperlinkTestCase.assertRaises."
-
- def __init__(self, expected, test_case):
- self.expected = expected
- self.failureException = test_case.failureException
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, tb):
- if exc_type is None:
- exc_name = self.expected.__name__
- raise self.failureException("%s not raised" % (exc_name,))
- if not issubclass(exc_type, self.expected):
- # let unexpected exceptions pass through
- return False
- self.exception = exc_value # store for later retrieval
- return True
diff --git a/hyperlink/test/test_decoded_url.py b/hyperlink/test/test_decoded_url.py
deleted file mode 100644
index 4e6f8b97..00000000
--- a/hyperlink/test/test_decoded_url.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import unicode_literals
-
-from .. import DecodedURL
-from .._url import _percent_decode
-from .common import HyperlinkTestCase
-
-BASIC_URL = 'http://example.com/#'
-TOTAL_URL = "https://%75%73%65%72:%00%00%00%00@xn--bcher-kva.ch:8080/a/nice%20nice/./path/?zot=23%25&zut#frég"
-
-
-class TestURL(HyperlinkTestCase):
-
- def test_durl_basic(self):
- bdurl = DecodedURL.from_text(BASIC_URL)
- assert bdurl.scheme == 'http'
- assert bdurl.host == 'example.com'
- assert bdurl.port == 80
- assert bdurl.path == ('',)
- assert bdurl.fragment == ''
-
- durl = DecodedURL.from_text(TOTAL_URL)
-
- assert durl.scheme == 'https'
- assert durl.host == 'bücher.ch'
- assert durl.port == 8080
- assert durl.path == ('a', 'nice nice', '.', 'path', '')
- assert durl.fragment == 'frég'
- assert durl.get('zot') == ['23%']
-
- assert durl.user == 'user'
- assert durl.userinfo == ('user', '\0\0\0\0')
-
- def test_passthroughs(self):
- # just basic tests for the methods that more or less pass straight
- # through to the underlying URL
-
- durl = DecodedURL.from_text(TOTAL_URL)
- assert durl.sibling('te%t').path[-1] == 'te%t'
- assert durl.child('../test2%').path[-1] == '../test2%'
- assert durl.child() == durl
- assert durl.child() is durl
- assert durl.click('/').path[-1] == ''
- assert durl.user == 'user'
-
- assert '.' in durl.path
- assert '.' not in durl.normalize().path
-
- assert durl.to_uri().fragment == 'fr%C3%A9g'
- assert ' ' in durl.to_iri().path[1]
-
- assert durl.to_text(with_password=True) == TOTAL_URL
-
- assert durl.absolute
- assert durl.rooted
-
- assert durl == durl.encoded_url.get_decoded_url()
-
- durl2 = DecodedURL.from_text(TOTAL_URL, lazy=True)
- assert durl2 == durl2.encoded_url.get_decoded_url(lazy=True)
-
- assert str(DecodedURL.from_text(BASIC_URL).child(' ')) == 'http://example.com/%20'
-
- assert not (durl == 1)
- assert durl != 1
-
- def test_repr(self):
- durl = DecodedURL.from_text(TOTAL_URL)
- assert repr(durl) == 'DecodedURL(url=' + repr(durl._url) + ')'
-
- def test_query_manipulation(self):
- durl = DecodedURL.from_text(TOTAL_URL)
-
- assert durl.get('zot') == ['23%']
- durl = durl.add(' ', 'space')
- assert durl.get(' ') == ['space']
- durl = durl.set(' ', 'spa%ed')
- assert durl.get(' ') == ['spa%ed']
-
- durl = DecodedURL(url=durl.to_uri())
- assert durl.get(' ') == ['spa%ed']
- durl = durl.remove(' ')
- assert durl.get(' ') == []
-
- durl = DecodedURL.from_text('/?%61rg=b&arg=c')
- assert durl.get('arg') == ['b', 'c']
-
- assert durl.set('arg', 'd').get('arg') == ['d']
-
- durl = DecodedURL.from_text(u"https://example.com/a/b/?fóó=1&bar=2&fóó=3")
- assert durl.remove("fóó") == DecodedURL.from_text("https://example.com/a/b/?bar=2")
- assert durl.remove("fóó", value="1") == DecodedURL.from_text("https://example.com/a/b/?bar=2&fóó=3")
- assert durl.remove("fóó", limit=1) == DecodedURL.from_text("https://example.com/a/b/?bar=2&fóó=3")
- assert durl.remove("fóó", value="1", limit=0) == DecodedURL.from_text("https://example.com/a/b/?fóó=1&bar=2&fóó=3")
-
- def test_equality_and_hashability(self):
- durl = DecodedURL.from_text(TOTAL_URL)
- durl2 = DecodedURL.from_text(TOTAL_URL)
- burl = DecodedURL.from_text(BASIC_URL)
- durl_uri = durl.to_uri()
-
- assert durl == durl
- assert durl == durl2
- assert durl != burl
- assert durl != None
- assert durl != durl._url
-
- durl_map = {}
- durl_map[durl] = durl
- durl_map[durl2] = durl2
-
- assert len(durl_map) == 1
-
- durl_map[burl] = burl
-
- assert len(durl_map) == 2
-
- durl_map[durl_uri] = durl_uri
-
- assert len(durl_map) == 3
-
- def test_replace_roundtrip(self):
- durl = DecodedURL.from_text(TOTAL_URL)
-
- durl2 = durl.replace(scheme=durl.scheme,
- host=durl.host,
- path=durl.path,
- query=durl.query,
- fragment=durl.fragment,
- port=durl.port,
- rooted=durl.rooted,
- userinfo=durl.userinfo,
- uses_netloc=durl.uses_netloc)
-
- assert durl == durl2
-
- def test_replace_userinfo(self):
- durl = DecodedURL.from_text(TOTAL_URL)
- with self.assertRaises(ValueError):
- durl.replace(userinfo=['user', 'pw', 'thiswillcauseafailure'])
- return
-
- def test_twisted_compat(self):
- durl = DecodedURL.from_text(TOTAL_URL)
-
- assert durl == DecodedURL.fromText(TOTAL_URL)
- assert 'to_text' in dir(durl)
- assert 'asText' not in dir(durl)
- assert durl.to_text() == durl.asText()
-
- def test_percent_decode_bytes(self):
- assert _percent_decode('%00', subencoding=False) == b'\0'
-
- def test_percent_decode_mixed(self):
- # See https://github.com/python-hyper/hyperlink/pull/59 for a
- # nice discussion of the possibilities
- assert _percent_decode('abcdé%C3%A9éfg') == 'abcdéééfg'
-
- # still allow percent encoding in the case of an error
- assert _percent_decode('abcdé%C3éfg') == 'abcdé%C3éfg'
-
- # ...unless explicitly told otherwise
- with self.assertRaises(UnicodeDecodeError):
- _percent_decode('abcdé%C3éfg', raise_subencoding_exc=True)
-
- # check that getting raw bytes works ok
- assert _percent_decode('a%00b', subencoding=False) == b'a\x00b'
-
- # when not encodable as subencoding
- assert _percent_decode('é%25é', subencoding='ascii') == 'é%25é'
-
- def test_click_decoded_url(self):
- durl = DecodedURL.from_text(TOTAL_URL)
- durl_dest = DecodedURL.from_text('/tëst')
-
- clicked = durl.click(durl_dest)
- assert clicked.host == durl.host
- assert clicked.path == durl_dest.path
- assert clicked.path == ('tëst',)
diff --git a/hyperlink/test/test_scheme_registration.py b/hyperlink/test/test_scheme_registration.py
deleted file mode 100644
index d344353c..00000000
--- a/hyperlink/test/test_scheme_registration.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
-
-from .. import _url
-from .common import HyperlinkTestCase
-from .._url import register_scheme, URL
-
-
-class TestSchemeRegistration(HyperlinkTestCase):
-
- def setUp(self):
- self._orig_scheme_port_map = dict(_url.SCHEME_PORT_MAP)
- self._orig_no_netloc_schemes = set(_url.NO_NETLOC_SCHEMES)
-
- def tearDown(self):
- _url.SCHEME_PORT_MAP = self._orig_scheme_port_map
- _url.NO_NETLOC_SCHEMES = self._orig_no_netloc_schemes
-
- def test_register_scheme_basic(self):
- register_scheme('deltron', uses_netloc=True, default_port=3030)
-
- u1 = URL.from_text('deltron://example.com')
- assert u1.scheme == 'deltron'
- assert u1.port == 3030
- assert u1.uses_netloc is True
-
- # test netloc works even when the original gives no indication
- u2 = URL.from_text('deltron:')
- u2 = u2.replace(host='example.com')
- assert u2.to_text() == 'deltron://example.com'
-
- # test default port means no emission
- u3 = URL.from_text('deltron://example.com:3030')
- assert u3.to_text() == 'deltron://example.com'
-
- register_scheme('nonetron', default_port=3031)
- u4 = URL(scheme='nonetron')
- u4 = u4.replace(host='example.com')
- assert u4.to_text() == 'nonetron://example.com'
-
- def test_register_no_netloc_scheme(self):
- register_scheme('noloctron', uses_netloc=False)
- u4 = URL(scheme='noloctron')
- u4 = u4.replace(path=("example", "path"))
- assert u4.to_text() == 'noloctron:example/path'
-
- def test_register_no_netloc_with_port(self):
- with self.assertRaises(ValueError):
- register_scheme('badnetlocless', uses_netloc=False, default_port=7)
-
- def test_invalid_uses_netloc(self):
- with self.assertRaises(ValueError):
- register_scheme('badnetloc', uses_netloc=None)
- with self.assertRaises(ValueError):
- register_scheme('badnetloc', uses_netloc=object())
-
- def test_register_invalid_uses_netloc(self):
- with self.assertRaises(ValueError):
- register_scheme('lol', uses_netloc=lambda: 'nope')
-
- def test_register_invalid_port(self):
- with self.assertRaises(ValueError):
- register_scheme('nope', default_port=lambda: 'lol')
diff --git a/hyperlink/test/test_url.py b/hyperlink/test/test_url.py
deleted file mode 100644
index 09405857..00000000
--- a/hyperlink/test/test_url.py
+++ /dev/null
@@ -1,1210 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright (c) Twisted Matrix Laboratories.
-# See LICENSE for details.
-
-from __future__ import unicode_literals
-
-import sys
-import socket
-
-from .common import HyperlinkTestCase
-from .. import URL, URLParseError
-# automatically import the py27 windows implementation when appropriate
-from .. import _url
-from .._url import inet_pton, SCHEME_PORT_MAP, parse_host
-
-
-PY2 = (sys.version_info[0] == 2)
-unicode = type(u'')
-
-
-BASIC_URL = "http://www.foo.com/a/nice/path/?zot=23&zut"
-
-# Examples from RFC 3986 section 5.4, Reference Resolution Examples
-relativeLinkBaseForRFC3986 = 'http://a/b/c/d;p?q'
-relativeLinkTestsForRFC3986 = [
- # "Normal"
- # ('g:h', 'g:h'), # can't click on a scheme-having url without an abs path
- ('g', 'http://a/b/c/g'),
- ('./g', 'http://a/b/c/g'),
- ('g/', 'http://a/b/c/g/'),
- ('/g', 'http://a/g'),
- ('//g', 'http://g'),
- ('?y', 'http://a/b/c/d;p?y'),
- ('g?y', 'http://a/b/c/g?y'),
- ('#s', 'http://a/b/c/d;p?q#s'),
- ('g#s', 'http://a/b/c/g#s'),
- ('g?y#s', 'http://a/b/c/g?y#s'),
- (';x', 'http://a/b/c/;x'),
- ('g;x', 'http://a/b/c/g;x'),
- ('g;x?y#s', 'http://a/b/c/g;x?y#s'),
- ('', 'http://a/b/c/d;p?q'),
- ('.', 'http://a/b/c/'),
- ('./', 'http://a/b/c/'),
- ('..', 'http://a/b/'),
- ('../', 'http://a/b/'),
- ('../g', 'http://a/b/g'),
- ('../..', 'http://a/'),
- ('../../', 'http://a/'),
- ('../../g', 'http://a/g'),
-
- # Abnormal examples
- # ".." cannot be used to change the authority component of a URI.
- ('../../../g', 'http://a/g'),
- ('../../../../g', 'http://a/g'),
-
- # Only include "." and ".." when they are only part of a larger segment,
- # not by themselves.
- ('/./g', 'http://a/g'),
- ('/../g', 'http://a/g'),
- ('g.', 'http://a/b/c/g.'),
- ('.g', 'http://a/b/c/.g'),
- ('g..', 'http://a/b/c/g..'),
- ('..g', 'http://a/b/c/..g'),
- # Unnecessary or nonsensical forms of "." and "..".
- ('./../g', 'http://a/b/g'),
- ('./g/.', 'http://a/b/c/g/'),
- ('g/./h', 'http://a/b/c/g/h'),
- ('g/../h', 'http://a/b/c/h'),
- ('g;x=1/./y', 'http://a/b/c/g;x=1/y'),
- ('g;x=1/../y', 'http://a/b/c/y'),
- # Separating the reference's query and fragment components from the path.
- ('g?y/./x', 'http://a/b/c/g?y/./x'),
- ('g?y/../x', 'http://a/b/c/g?y/../x'),
- ('g#s/./x', 'http://a/b/c/g#s/./x'),
- ('g#s/../x', 'http://a/b/c/g#s/../x')
-]
-
-
-ROUNDTRIP_TESTS = (
- "http://localhost",
- "http://localhost/",
- "http://127.0.0.1/",
- "http://[::127.0.0.1]/",
- "http://[::1]/",
- "http://localhost/foo",
- "http://localhost/foo/",
- "http://localhost/foo!!bar/",
- "http://localhost/foo%20bar/",
- "http://localhost/foo%2Fbar/",
- "http://localhost/foo?n",
- "http://localhost/foo?n=v",
- "http://localhost/foo?n=/a/b",
- "http://example.com/foo!@$bar?b!@z=123",
- "http://localhost/asd?a=asd%20sdf/345",
- "http://(%2525)/(%2525)?(%2525)&(%2525)=(%2525)#(%2525)",
- "http://(%C3%A9)/(%C3%A9)?(%C3%A9)&(%C3%A9)=(%C3%A9)#(%C3%A9)",
- "?sslrootcert=/Users/glyph/Downloads/rds-ca-2015-root.pem&sslmode=verify",
-
- # from boltons.urlutils' tests
-
- 'http://googlewebsite.com/e-shops.aspx',
- 'http://example.com:8080/search?q=123&business=Nothing%20Special',
- 'http://hatnote.com:9000/?arg=1&arg=2&arg=3',
- 'https://xn--bcher-kva.ch',
- 'http://xn--ggbla1c4e.xn--ngbc5azd/',
- 'http://tools.ietf.org/html/rfc3986#section-3.4',
- # 'http://wiki:pedia@hatnote.com',
- 'ftp://ftp.rfc-editor.org/in-notes/tar/RFCs0001-0500.tar.gz',
- 'http://[1080:0:0:0:8:800:200C:417A]/index.html',
- 'ssh://192.0.2.16:2222/',
- 'https://[::101.45.75.219]:80/?hi=bye',
- 'ldap://[::192.9.5.5]/dc=example,dc=com??sub?(sn=Jensen)',
- 'mailto:me@example.com?to=me@example.com&body=hi%20http://wikipedia.org',
- 'news:alt.rec.motorcycle',
- 'tel:+1-800-867-5309',
- 'urn:oasis:member:A00024:x',
- ('magnet:?xt=urn:btih:1a42b9e04e122b97a5254e3df77ab3c4b7da725f&dn=Puppy%'
- '20Linux%20precise-5.7.1.iso&tr=udp://tracker.openbittorrent.com:80&'
- 'tr=udp://tracker.publicbt.com:80&tr=udp://tracker.istole.it:6969&'
- 'tr=udp://tracker.ccc.de:80&tr=udp://open.demonii.com:1337'),
-
- # percent-encoded delimiters in percent-encodable fields
-
- 'https://%3A@example.com/', # colon in username
- 'https://%40@example.com/', # at sign in username
- 'https://%2f@example.com/', # slash in username
- 'https://a:%3a@example.com/', # colon in password
- 'https://a:%40@example.com/', # at sign in password
- 'https://a:%2f@example.com/', # slash in password
- 'https://a:%3f@example.com/', # question mark in password
- 'https://example.com/%2F/', # slash in path
- 'https://example.com/%3F/', # question mark in path
- 'https://example.com/%23/', # hash in path
- 'https://example.com/?%23=b', # hash in query param name
- 'https://example.com/?%3D=b', # equals in query param name
- 'https://example.com/?%26=b', # ampersand in query param name
- 'https://example.com/?a=%23', # hash in query param value
- 'https://example.com/?a=%26', # ampersand in query param value
- 'https://example.com/?a=%3D', # equals in query param value
- # double-encoded percent sign in all percent-encodable positions:
- "http://(%2525):(%2525)@example.com/(%2525)/?(%2525)=(%2525)#(%2525)",
- # colon in first part of schemeless relative url
- 'first_seg_rel_path__colon%3Anotok/second_seg__colon%3Aok',
-)
-
-
-class TestURL(HyperlinkTestCase):
- """
- Tests for L{URL}.
- """
-
- def assertUnicoded(self, u):
- """
- The given L{URL}'s components should be L{unicode}.
-
- @param u: The L{URL} to test.
- """
- self.assertTrue(isinstance(u.scheme, unicode) or u.scheme is None,
- repr(u))
- self.assertTrue(isinstance(u.host, unicode) or u.host is None,
- repr(u))
- for seg in u.path:
- self.assertEqual(type(seg), unicode, repr(u))
- for (k, v) in u.query:
- self.assertEqual(type(seg), unicode, repr(u))
- self.assertTrue(v is None or isinstance(v, unicode), repr(u))
- self.assertEqual(type(u.fragment), unicode, repr(u))
-
- def assertURL(self, u, scheme, host, path, query,
- fragment, port, userinfo=''):
- """
- The given L{URL} should have the given components.
-
- @param u: The actual L{URL} to examine.
-
- @param scheme: The expected scheme.
-
- @param host: The expected host.
-
- @param path: The expected path.
-
- @param query: The expected query.
-
- @param fragment: The expected fragment.
-
- @param port: The expected port.
-
- @param userinfo: The expected userinfo.
- """
- actual = (u.scheme, u.host, u.path, u.query,
- u.fragment, u.port, u.userinfo)
- expected = (scheme, host, tuple(path), tuple(query),
- fragment, port, u.userinfo)
- self.assertEqual(actual, expected)
-
- def test_initDefaults(self):
- """
- L{URL} should have appropriate default values.
- """
- def check(u):
- self.assertUnicoded(u)
- self.assertURL(u, 'http', '', [], [], '', 80, '')
-
- check(URL('http', ''))
- check(URL('http', '', [], []))
- check(URL('http', '', [], [], ''))
-
- def test_init(self):
- """
- L{URL} should accept L{unicode} parameters.
- """
- u = URL('s', 'h', ['p'], [('k', 'v'), ('k', None)], 'f')
- self.assertUnicoded(u)
- self.assertURL(u, 's', 'h', ['p'], [('k', 'v'), ('k', None)],
- 'f', None)
-
- self.assertURL(URL('http', '\xe0', ['\xe9'],
- [('\u03bb', '\u03c0')], '\u22a5'),
- 'http', '\xe0', ['\xe9'],
- [('\u03bb', '\u03c0')], '\u22a5', 80)
-
- def test_initPercent(self):
- """
- L{URL} should accept (and not interpret) percent characters.
- """
- u = URL('s', '%68', ['%70'], [('%6B', '%76'), ('%6B', None)],
- '%66')
- self.assertUnicoded(u)
- self.assertURL(u,
- 's', '%68', ['%70'],
- [('%6B', '%76'), ('%6B', None)],
- '%66', None)
-
- def test_repr(self):
- """
- L{URL.__repr__} will display the canonical form of the URL, wrapped in
- a L{URL.from_text} invocation, so that it is C{eval}-able but still easy
- to read.
- """
- self.assertEqual(
- repr(URL(scheme='http', host='foo', path=['bar'],
- query=[('baz', None), ('k', 'v')],
- fragment='frob')),
- "URL.from_text(%s)" % (repr(u"http://foo/bar?baz&k=v#frob"),)
- )
-
- def test_from_text(self):
- """
- Round-tripping L{URL.from_text} with C{str} results in an equivalent
- URL.
- """
- urlpath = URL.from_text(BASIC_URL)
- self.assertEqual(BASIC_URL, urlpath.to_text())
-
- def test_roundtrip(self):
- """
- L{URL.to_text} should invert L{URL.from_text}.
- """
- for test in ROUNDTRIP_TESTS:
- result = URL.from_text(test).to_text(with_password=True)
- self.assertEqual(test, result)
-
- def test_roundtrip_double_iri(self):
- for test in ROUNDTRIP_TESTS:
- url = URL.from_text(test)
- iri = url.to_iri()
- double_iri = iri.to_iri()
- assert iri == double_iri
-
- iri_text = iri.to_text(with_password=True)
- double_iri_text = double_iri.to_text(with_password=True)
- assert iri_text == double_iri_text
- return
-
- def test_equality(self):
- """
- Two URLs decoded using L{URL.from_text} will be equal (C{==}) if they
- decoded same URL string, and unequal (C{!=}) if they decoded different
- strings.
- """
- urlpath = URL.from_text(BASIC_URL)
- self.assertEqual(urlpath, URL.from_text(BASIC_URL))
- self.assertNotEqual(
- urlpath,
- URL.from_text('ftp://www.anotherinvaliddomain.com/'
- 'foo/bar/baz/?zot=21&zut')
- )
-
- def test_fragmentEquality(self):
- """
- An URL created with the empty string for a fragment compares equal
- to an URL created with an unspecified fragment.
- """
- self.assertEqual(URL(fragment=''), URL())
- self.assertEqual(URL.from_text(u"http://localhost/#"),
- URL.from_text(u"http://localhost/"))
-
- def test_child(self):
- """
- L{URL.child} appends a new path segment, but does not affect the query
- or fragment.
- """
- urlpath = URL.from_text(BASIC_URL)
- self.assertEqual("http://www.foo.com/a/nice/path/gong?zot=23&zut",
- urlpath.child('gong').to_text())
- self.assertEqual("http://www.foo.com/a/nice/path/gong%2F?zot=23&zut",
- urlpath.child('gong/').to_text())
- self.assertEqual(
- "http://www.foo.com/a/nice/path/gong%2Fdouble?zot=23&zut",
- urlpath.child('gong/double').to_text()
- )
- self.assertEqual(
- "http://www.foo.com/a/nice/path/gong%2Fdouble%2F?zot=23&zut",
- urlpath.child('gong/double/').to_text()
- )
-
- def test_multiChild(self):
- """
- L{URL.child} receives multiple segments as C{*args} and appends each in
- turn.
- """
- url = URL.from_text('http://example.com/a/b')
- self.assertEqual(url.child('c', 'd', 'e').to_text(),
- 'http://example.com/a/b/c/d/e')
-
- def test_childInitRoot(self):
- """
- L{URL.child} of a L{URL} without a path produces a L{URL} with a single
- path segment.
- """
- childURL = URL(host=u"www.foo.com").child(u"c")
- self.assertTrue(childURL.rooted)
- self.assertEqual("http://www.foo.com/c", childURL.to_text())
-
- def test_emptyChild(self):
- """
- L{URL.child} without any new segments returns the original L{URL}.
- """
- url = URL(host=u"www.foo.com")
- self.assertEqual(url.child(), url)
-
- def test_sibling(self):
- """
- L{URL.sibling} of a L{URL} replaces the last path segment, but does not
- affect the query or fragment.
- """
- urlpath = URL.from_text(BASIC_URL)
- self.assertEqual(
- "http://www.foo.com/a/nice/path/sister?zot=23&zut",
- urlpath.sibling('sister').to_text()
- )
- # Use an url without trailing '/' to check child removal.
- url_text = "http://www.foo.com/a/nice/path?zot=23&zut"
- urlpath = URL.from_text(url_text)
- self.assertEqual(
- "http://www.foo.com/a/nice/sister?zot=23&zut",
- urlpath.sibling('sister').to_text()
- )
-
- def test_click(self):
- """
- L{URL.click} interprets the given string as a relative URI-reference
- and returns a new L{URL} interpreting C{self} as the base absolute URI.
- """
- urlpath = URL.from_text(BASIC_URL)
- # A null uri should be valid (return here).
- self.assertEqual("http://www.foo.com/a/nice/path/?zot=23&zut",
- urlpath.click("").to_text())
- # A simple relative path remove the query.
- self.assertEqual("http://www.foo.com/a/nice/path/click",
- urlpath.click("click").to_text())
- # An absolute path replace path and query.
- self.assertEqual("http://www.foo.com/click",
- urlpath.click("/click").to_text())
- # Replace just the query.
- self.assertEqual("http://www.foo.com/a/nice/path/?burp",
- urlpath.click("?burp").to_text())
- # One full url to another should not generate '//' between authority.
- # and path
- self.assertTrue("//foobar" not in
- urlpath.click('http://www.foo.com/foobar').to_text())
-
- # From a url with no query clicking a url with a query, the query
- # should be handled properly.
- u = URL.from_text('http://www.foo.com/me/noquery')
- self.assertEqual('http://www.foo.com/me/17?spam=158',
- u.click('/me/17?spam=158').to_text())
-
- # Check that everything from the path onward is removed when the click
- # link has no path.
- u = URL.from_text('http://localhost/foo?abc=def')
- self.assertEqual(u.click('http://www.python.org').to_text(),
- 'http://www.python.org')
-
- # https://twistedmatrix.com/trac/ticket/8184
- u = URL.from_text('http://hatnote.com/a/b/../c/./d/e/..')
- res = 'http://hatnote.com/a/c/d/'
- self.assertEqual(u.click('').to_text(), res)
-
- # test click default arg is same as empty string above
- self.assertEqual(u.click().to_text(), res)
-
- # test click on a URL instance
- u = URL.fromText('http://localhost/foo/?abc=def')
- u2 = URL.from_text('bar')
- u3 = u.click(u2)
- self.assertEqual(u3.to_text(), 'http://localhost/foo/bar')
-
- def test_clickRFC3986(self):
- """
- L{URL.click} should correctly resolve the examples in RFC 3986.
- """
- base = URL.from_text(relativeLinkBaseForRFC3986)
- for (ref, expected) in relativeLinkTestsForRFC3986:
- self.assertEqual(base.click(ref).to_text(), expected)
-
- def test_clickSchemeRelPath(self):
- """
- L{URL.click} should not accept schemes with relative paths.
- """
- base = URL.from_text(relativeLinkBaseForRFC3986)
- self.assertRaises(NotImplementedError, base.click, 'g:h')
- self.assertRaises(NotImplementedError, base.click, 'http:h')
-
- def test_cloneUnchanged(self):
- """
- Verify that L{URL.replace} doesn't change any of the arguments it
- is passed.
- """
- urlpath = URL.from_text('https://x:1/y?z=1#A')
- self.assertEqual(urlpath.replace(urlpath.scheme,
- urlpath.host,
- urlpath.path,
- urlpath.query,
- urlpath.fragment,
- urlpath.port),
- urlpath)
- self.assertEqual(urlpath.replace(), urlpath)
-
- def test_clickCollapse(self):
- """
- L{URL.click} collapses C{.} and C{..} according to RFC 3986 section
- 5.2.4.
- """
- tests = [
- ['http://localhost/', '.', 'http://localhost/'],
- ['http://localhost/', '..', 'http://localhost/'],
- ['http://localhost/a/b/c', '.', 'http://localhost/a/b/'],
- ['http://localhost/a/b/c', '..', 'http://localhost/a/'],
- ['http://localhost/a/b/c', './d/e', 'http://localhost/a/b/d/e'],
- ['http://localhost/a/b/c', '../d/e', 'http://localhost/a/d/e'],
- ['http://localhost/a/b/c', '/./d/e', 'http://localhost/d/e'],
- ['http://localhost/a/b/c', '/../d/e', 'http://localhost/d/e'],
- ['http://localhost/a/b/c/', '../../d/e/',
- 'http://localhost/a/d/e/'],
- ['http://localhost/a/./c', '../d/e', 'http://localhost/d/e'],
- ['http://localhost/a/./c/', '../d/e', 'http://localhost/a/d/e'],
- ['http://localhost/a/b/c/d', './e/../f/../g',
- 'http://localhost/a/b/c/g'],
- ['http://localhost/a/b/c', 'd//e', 'http://localhost/a/b/d//e'],
- ]
- for start, click, expected in tests:
- actual = URL.from_text(start).click(click).to_text()
- self.assertEqual(
- actual,
- expected,
- "{start}.click({click}) => {actual} not {expected}".format(
- start=start,
- click=repr(click),
- actual=actual,
- expected=expected,
- )
- )
-
- def test_queryAdd(self):
- """
- L{URL.add} adds query parameters.
- """
- self.assertEqual(
- "http://www.foo.com/a/nice/path/?foo=bar",
- URL.from_text("http://www.foo.com/a/nice/path/")
- .add(u"foo", u"bar").to_text())
- self.assertEqual(
- "http://www.foo.com/?foo=bar",
- URL(host=u"www.foo.com").add(u"foo", u"bar")
- .to_text())
- urlpath = URL.from_text(BASIC_URL)
- self.assertEqual(
- "http://www.foo.com/a/nice/path/?zot=23&zut&burp",
- urlpath.add(u"burp").to_text())
- self.assertEqual(
- "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx",
- urlpath.add(u"burp", u"xxx").to_text())
- self.assertEqual(
- "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zing",
- urlpath.add(u"burp", u"xxx").add(u"zing").to_text())
- # Note the inversion!
- self.assertEqual(
- "http://www.foo.com/a/nice/path/?zot=23&zut&zing&burp=xxx",
- urlpath.add(u"zing").add(u"burp", u"xxx").to_text())
- # Note the two values for the same name.
- self.assertEqual(
- "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zot=32",
- urlpath.add(u"burp", u"xxx").add(u"zot", '32')
- .to_text())
-
- def test_querySet(self):
- """
- L{URL.set} replaces query parameters by name.
- """
- urlpath = URL.from_text(BASIC_URL)
- self.assertEqual(
- "http://www.foo.com/a/nice/path/?zot=32&zut",
- urlpath.set(u"zot", '32').to_text())
- # Replace name without value with name/value and vice-versa.
- self.assertEqual(
- "http://www.foo.com/a/nice/path/?zot&zut=itworked",
- urlpath.set(u"zot").set(u"zut", u"itworked").to_text()
- )
- # Q: what happens when the query has two values and we replace?
- # A: we replace both values with a single one
- self.assertEqual(
- "http://www.foo.com/a/nice/path/?zot=32&zut",
- urlpath.add(u"zot", u"xxx").set(u"zot", '32').to_text()
- )
-
- def test_queryRemove(self):
- """
- L{URL.remove} removes instances of a query parameter.
- """
- url = URL.from_text(u"https://example.com/a/b/?foo=1&bar=2&foo=3")
- self.assertEqual(
- url.remove(u"foo"),
- URL.from_text(u"https://example.com/a/b/?bar=2")
- )
-
- self.assertEqual(
- url.remove(name=u"foo", value=u"1"),
- URL.from_text(u"https://example.com/a/b/?bar=2&foo=3")
- )
-
- self.assertEqual(
- url.remove(name=u"foo", limit=1),
- URL.from_text(u"https://example.com/a/b/?bar=2&foo=3")
- )
-
- self.assertEqual(
- url.remove(name=u"foo", value=u"1", limit=0),
- URL.from_text(u"https://example.com/a/b/?foo=1&bar=2&foo=3")
- )
-
- def test_parseEqualSignInParamValue(self):
- """
- Every C{=}-sign after the first in a query parameter is simply included
- in the value of the parameter.
- """
- u = URL.from_text('http://localhost/?=x=x=x')
- self.assertEqual(u.get(''), ['x=x=x'])
- self.assertEqual(u.to_text(), 'http://localhost/?=x=x=x')
- u = URL.from_text('http://localhost/?foo=x=x=x&bar=y')
- self.assertEqual(u.query, (('foo', 'x=x=x'), ('bar', 'y')))
- self.assertEqual(u.to_text(), 'http://localhost/?foo=x=x=x&bar=y')
-
- u = URL.from_text('https://example.com/?argument=3&argument=4&operator=%3D')
- iri = u.to_iri()
- self.assertEqual(iri.get('operator'), ['='])
- # assert that the equals is not unnecessarily escaped
- self.assertEqual(iri.to_uri().get('operator'), ['='])
-
- def test_empty(self):
- """
- An empty L{URL} should serialize as the empty string.
- """
- self.assertEqual(URL().to_text(), '')
-
- def test_justQueryText(self):
- """
- An L{URL} with query text should serialize as just query text.
- """
- u = URL(query=[(u"hello", u"world")])
- self.assertEqual(u.to_text(), '?hello=world')
-
- def test_identicalEqual(self):
- """
- L{URL} compares equal to itself.
- """
- u = URL.from_text('http://localhost/')
- self.assertEqual(u, u)
-
- def test_similarEqual(self):
- """
- URLs with equivalent components should compare equal.
- """
- u1 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f')
- u2 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f')
- self.assertEqual(u1, u2)
-
- def test_differentNotEqual(self):
- """
- L{URL}s that refer to different resources are both unequal (C{!=}) and
- also not equal (not C{==}).
- """
- u1 = URL.from_text('http://localhost/a')
- u2 = URL.from_text('http://localhost/b')
- self.assertFalse(u1 == u2, "%r != %r" % (u1, u2))
- self.assertNotEqual(u1, u2)
-
- def test_otherTypesNotEqual(self):
- """
- L{URL} is not equal (C{==}) to other types.
- """
- u = URL.from_text('http://localhost/')
- self.assertFalse(u == 42, "URL must not equal a number.")
- self.assertFalse(u == object(), "URL must not equal an object.")
- self.assertNotEqual(u, 42)
- self.assertNotEqual(u, object())
-
- def test_identicalNotUnequal(self):
- """
- Identical L{URL}s are not unequal (C{!=}) to each other.
- """
- u = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f')
- self.assertFalse(u != u, "%r == itself" % u)
-
- def test_similarNotUnequal(self):
- """
- Structurally similar L{URL}s are not unequal (C{!=}) to each other.
- """
- u1 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f')
- u2 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f')
- self.assertFalse(u1 != u2, "%r == %r" % (u1, u2))
-
- def test_differentUnequal(self):
- """
- Structurally different L{URL}s are unequal (C{!=}) to each other.
- """
- u1 = URL.from_text('http://localhost/a')
- u2 = URL.from_text('http://localhost/b')
- self.assertTrue(u1 != u2, "%r == %r" % (u1, u2))
-
- def test_otherTypesUnequal(self):
- """
- L{URL} is unequal (C{!=}) to other types.
- """
- u = URL.from_text('http://localhost/')
- self.assertTrue(u != 42, "URL must differ from a number.")
- self.assertTrue(u != object(), "URL must be differ from an object.")
-
- def test_asURI(self):
- """
- L{URL.asURI} produces an URI which converts any URI unicode encoding
- into pure US-ASCII and returns a new L{URL}.
- """
- unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
- '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}'
- '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}='
- '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}'
- '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}')
- iri = URL.from_text(unicodey)
- uri = iri.asURI()
- self.assertEqual(iri.host, '\N{LATIN SMALL LETTER E WITH ACUTE}.com')
- self.assertEqual(iri.path[0],
- '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}')
- self.assertEqual(iri.to_text(), unicodey)
- expectedURI = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA'
- actualURI = uri.to_text()
- self.assertEqual(actualURI, expectedURI,
- '%r != %r' % (actualURI, expectedURI))
-
- def test_asIRI(self):
- """
- L{URL.asIRI} decodes any percent-encoded text in the URI, making it
- more suitable for reading by humans, and returns a new L{URL}.
- """
- asciiish = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA'
- uri = URL.from_text(asciiish)
- iri = uri.asIRI()
- self.assertEqual(uri.host, 'xn--9ca.com')
- self.assertEqual(uri.path[0], '%C3%A9')
- self.assertEqual(uri.to_text(), asciiish)
- expectedIRI = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
- '\N{LATIN SMALL LETTER E WITH ACUTE}'
- '?\N{LATIN SMALL LETTER A WITH ACUTE}='
- '\N{LATIN SMALL LETTER I WITH ACUTE}'
- '#\N{LATIN SMALL LETTER U WITH ACUTE}')
- actualIRI = iri.to_text()
- self.assertEqual(actualIRI, expectedIRI,
- '%r != %r' % (actualIRI, expectedIRI))
-
- def test_badUTF8AsIRI(self):
- """
- Bad UTF-8 in a path segment, query parameter, or fragment results in
- that portion of the URI remaining percent-encoded in the IRI.
- """
- urlWithBinary = 'http://xn--9ca.com/%00%FF/%C3%A9'
- uri = URL.from_text(urlWithBinary)
- iri = uri.asIRI()
- expectedIRI = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
- '%00%FF/'
- '\N{LATIN SMALL LETTER E WITH ACUTE}')
- actualIRI = iri.to_text()
- self.assertEqual(actualIRI, expectedIRI,
- '%r != %r' % (actualIRI, expectedIRI))
-
- def test_alreadyIRIAsIRI(self):
- """
- A L{URL} composed of non-ASCII text will result in non-ASCII text.
- """
- unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/'
- '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}'
- '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}='
- '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}'
- '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}')
- iri = URL.from_text(unicodey)
- alsoIRI = iri.asIRI()
- self.assertEqual(alsoIRI.to_text(), unicodey)
-
- def test_alreadyURIAsURI(self):
- """
- A L{URL} composed of encoded text will remain encoded.
- """
- expectedURI = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA'
- uri = URL.from_text(expectedURI)
- actualURI = uri.asURI().to_text()
- self.assertEqual(actualURI, expectedURI)
-
- def test_userinfo(self):
- """
- L{URL.from_text} will parse the C{userinfo} portion of the URI
- separately from the host and port.
- """
- url = URL.from_text(
- 'http://someuser:somepassword@example.com/some-segment@ignore'
- )
- self.assertEqual(url.authority(True),
- 'someuser:somepassword@example.com')
- self.assertEqual(url.authority(False), 'someuser:@example.com')
- self.assertEqual(url.userinfo, 'someuser:somepassword')
- self.assertEqual(url.user, 'someuser')
- self.assertEqual(url.to_text(),
- 'http://someuser:@example.com/some-segment@ignore')
- self.assertEqual(
- url.replace(userinfo=u"someuser").to_text(),
- 'http://someuser@example.com/some-segment@ignore'
- )
-
- def test_portText(self):
- """
- L{URL.from_text} parses custom port numbers as integers.
- """
- portURL = URL.from_text(u"http://www.example.com:8080/")
- self.assertEqual(portURL.port, 8080)
- self.assertEqual(portURL.to_text(), u"http://www.example.com:8080/")
-
- def test_mailto(self):
- """
- Although L{URL} instances are mainly for dealing with HTTP, other
- schemes (such as C{mailto:}) should work as well. For example,
- L{URL.from_text}/L{URL.to_text} round-trips cleanly for a C{mailto:} URL
- representing an email address.
- """
- self.assertEqual(URL.from_text(u"mailto:user@example.com").to_text(),
- u"mailto:user@example.com")
-
- def test_queryIterable(self):
- """
- When a L{URL} is created with a C{query} argument, the C{query}
- argument is converted into an N-tuple of 2-tuples, sensibly
- handling dictionaries.
- """
- expected = (('alpha', 'beta'),)
- url = URL(query=[['alpha', 'beta']])
- self.assertEqual(url.query, expected)
- url = URL(query={'alpha': 'beta'})
- self.assertEqual(url.query, expected)
-
- def test_pathIterable(self):
- """
- When a L{URL} is created with a C{path} argument, the C{path} is
- converted into a tuple.
- """
- url = URL(path=['hello', 'world'])
- self.assertEqual(url.path, ('hello', 'world'))
-
- def test_invalidArguments(self):
- """
- Passing an argument of the wrong type to any of the constructor
- arguments of L{URL} will raise a descriptive L{TypeError}.
-
- L{URL} typechecks very aggressively to ensure that its constitutent
- parts are all properly immutable and to prevent confusing errors when
- bad data crops up in a method call long after the code that called the
- constructor is off the stack.
- """
- class Unexpected(object):
- def __str__(self):
- return "wrong"
-
- def __repr__(self):
- return ""
-
- defaultExpectation = "unicode" if bytes is str else "str"
-
- def assertRaised(raised, expectation, name):
- self.assertEqual(str(raised.exception),
- "expected {0} for {1}, got {2}".format(
- expectation,
- name, ""))
-
- def check(param, expectation=defaultExpectation):
- with self.assertRaises(TypeError) as raised:
- URL(**{param: Unexpected()})
-
- assertRaised(raised, expectation, param)
-
- check("scheme")
- check("host")
- check("fragment")
- check("rooted", "bool")
- check("userinfo")
- check("port", "int or NoneType")
-
- with self.assertRaises(TypeError) as raised:
- URL(path=[Unexpected()])
-
- assertRaised(raised, defaultExpectation, "path segment")
-
- with self.assertRaises(TypeError) as raised:
- URL(query=[(u"name", Unexpected())])
-
- assertRaised(raised, defaultExpectation + " or NoneType",
- "query parameter value")
-
- with self.assertRaises(TypeError) as raised:
- URL(query=[(Unexpected(), u"value")])
-
- assertRaised(raised, defaultExpectation, "query parameter name")
- # No custom error message for this one, just want to make sure
- # non-2-tuples don't get through.
-
- with self.assertRaises(TypeError):
- URL(query=[Unexpected()])
-
- with self.assertRaises(ValueError):
- URL(query=[('k', 'v', 'vv')])
-
- with self.assertRaises(ValueError):
- URL(query=[('k',)])
-
- url = URL.from_text("https://valid.example.com/")
- with self.assertRaises(TypeError) as raised:
- url.child(Unexpected())
- assertRaised(raised, defaultExpectation, "path segment")
- with self.assertRaises(TypeError) as raised:
- url.sibling(Unexpected())
- assertRaised(raised, defaultExpectation, "path segment")
- with self.assertRaises(TypeError) as raised:
- url.click(Unexpected())
- assertRaised(raised, defaultExpectation, "relative URL")
-
- def test_technicallyTextIsIterableBut(self):
- """
- Technically, L{str} (or L{unicode}, as appropriate) is iterable, but
- C{URL(path="foo")} resulting in C{URL.from_text("f/o/o")} is never what
- you want.
- """
- with self.assertRaises(TypeError) as raised:
- URL(path='foo')
- self.assertEqual(
- str(raised.exception),
- "expected iterable of text for path, not: {0}"
- .format(repr('foo'))
- )
-
- def test_netloc(self):
- url = URL(scheme='https')
- self.assertEqual(url.uses_netloc, True)
-
- url = URL(scheme='git+https')
- self.assertEqual(url.uses_netloc, True)
-
- url = URL(scheme='mailto')
- self.assertEqual(url.uses_netloc, False)
-
- url = URL(scheme='ztp')
- self.assertEqual(url.uses_netloc, None)
-
- url = URL.from_text('ztp://test.com')
- self.assertEqual(url.uses_netloc, True)
-
- url = URL.from_text('ztp:test:com')
- self.assertEqual(url.uses_netloc, False)
-
- def test_ipv6_with_port(self):
- t = 'https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:80/'
- url = URL.from_text(t)
- assert url.host == '2001:0db8:85a3:0000:0000:8a2e:0370:7334'
- assert url.port == 80
- assert SCHEME_PORT_MAP[url.scheme] != url.port
-
- def test_basic(self):
- text = 'https://user:pass@example.com/path/to/here?k=v#nice'
- url = URL.from_text(text)
- assert url.scheme == 'https'
- assert url.userinfo == 'user:pass'
- assert url.host == 'example.com'
- assert url.path == ('path', 'to', 'here')
- assert url.fragment == 'nice'
-
- text = 'https://user:pass@127.0.0.1/path/to/here?k=v#nice'
- url = URL.from_text(text)
- assert url.scheme == 'https'
- assert url.userinfo == 'user:pass'
- assert url.host == '127.0.0.1'
- assert url.path == ('path', 'to', 'here')
-
- text = 'https://user:pass@[::1]/path/to/here?k=v#nice'
- url = URL.from_text(text)
- assert url.scheme == 'https'
- assert url.userinfo == 'user:pass'
- assert url.host == '::1'
- assert url.path == ('path', 'to', 'here')
-
- def test_invalid_url(self):
- self.assertRaises(URLParseError, URL.from_text, '#\n\n')
-
- def test_invalid_authority_url(self):
- self.assertRaises(URLParseError, URL.from_text, 'http://abc:\n\n/#')
-
- def test_invalid_ipv6(self):
- invalid_ipv6_ips = ['2001::0234:C1ab::A0:aabc:003F',
- '2001::1::3F',
- ':',
- '::::',
- '::256.0.0.1']
- for ip in invalid_ipv6_ips:
- url_text = 'http://[' + ip + ']'
- self.assertRaises(socket.error, inet_pton,
- socket.AF_INET6, ip)
- self.assertRaises(URLParseError, URL.from_text, url_text)
-
- def test_invalid_port(self):
- self.assertRaises(URLParseError, URL.from_text, 'ftp://portmouth:smash')
- self.assertRaises(ValueError, URL.from_text,
- 'http://reader.googlewebsite.com:neverforget')
-
- def test_idna(self):
- u1 = URL.from_text('http://bücher.ch')
- self.assertEquals(u1.host, 'bücher.ch')
- self.assertEquals(u1.to_text(), 'http://bücher.ch')
- self.assertEquals(u1.to_uri().to_text(), 'http://xn--bcher-kva.ch')
-
- u2 = URL.from_text('https://xn--bcher-kva.ch')
- self.assertEquals(u2.host, 'xn--bcher-kva.ch')
- self.assertEquals(u2.to_text(), 'https://xn--bcher-kva.ch')
- self.assertEquals(u2.to_iri().to_text(), u'https://bücher.ch')
-
- def test_netloc_slashes(self):
- # basic sanity checks
- url = URL.from_text('mailto:mahmoud@hatnote.com')
- self.assertEquals(url.scheme, 'mailto')
- self.assertEquals(url.to_text(), 'mailto:mahmoud@hatnote.com')
-
- url = URL.from_text('http://hatnote.com')
- self.assertEquals(url.scheme, 'http')
- self.assertEquals(url.to_text(), 'http://hatnote.com')
-
- # test that unrecognized schemes stay consistent with '//'
- url = URL.from_text('newscheme:a:b:c')
- self.assertEquals(url.scheme, 'newscheme')
- self.assertEquals(url.to_text(), 'newscheme:a:b:c')
-
- url = URL.from_text('newerscheme://a/b/c')
- self.assertEquals(url.scheme, 'newerscheme')
- self.assertEquals(url.to_text(), 'newerscheme://a/b/c')
-
- # test that reasonable guesses are made
- url = URL.from_text('git+ftp://gitstub.biz/glyph/lefkowitz')
- self.assertEquals(url.scheme, 'git+ftp')
- self.assertEquals(url.to_text(),
- 'git+ftp://gitstub.biz/glyph/lefkowitz')
-
- url = URL.from_text('what+mailto:freerealestate@enotuniq.org')
- self.assertEquals(url.scheme, 'what+mailto')
- self.assertEquals(url.to_text(),
- 'what+mailto:freerealestate@enotuniq.org')
-
- url = URL(scheme='ztp', path=('x', 'y', 'z'), rooted=True)
- self.assertEquals(url.to_text(), 'ztp:/x/y/z')
-
- # also works when the input doesn't include '//'
- url = URL(scheme='git+ftp', path=('x', 'y', 'z' ,''),
- rooted=True, uses_netloc=True)
- # broken bc urlunsplit
- self.assertEquals(url.to_text(), 'git+ftp:///x/y/z/')
-
- # really why would this ever come up but ok
- url = URL.from_text('file:///path/to/heck')
- url2 = url.replace(scheme='mailto')
- self.assertEquals(url2.to_text(), 'mailto:/path/to/heck')
-
- url_text = 'unregisteredscheme:///a/b/c'
- url = URL.from_text(url_text)
- no_netloc_url = url.replace(uses_netloc=False)
- self.assertEquals(no_netloc_url.to_text(), 'unregisteredscheme:/a/b/c')
- netloc_url = url.replace(uses_netloc=True)
- self.assertEquals(netloc_url.to_text(), url_text)
-
- return
-
- def test_wrong_constructor(self):
- with self.assertRaises(ValueError):
- # whole URL not allowed
- URL(BASIC_URL)
- with self.assertRaises(ValueError):
- # explicitly bad scheme not allowed
- URL('HTTP_____more_like_imHoTTeP')
-
- def test_encoded_userinfo(self):
- url = URL.from_text('http://user:pass@example.com')
- assert url.userinfo == 'user:pass'
- url = url.replace(userinfo='us%20her:pass')
- iri = url.to_iri()
- assert iri.to_text(with_password=True) == 'http://us her:pass@example.com'
- assert iri.to_text(with_password=False) == 'http://us her:@example.com'
- assert iri.to_uri().to_text(with_password=True) == 'http://us%20her:pass@example.com'
-
- def test_hash(self):
- url_map = {}
- url1 = URL.from_text('http://blog.hatnote.com/ask?utm_source=geocity')
- assert hash(url1) == hash(url1) # sanity
-
- url_map[url1] = 1
-
- url2 = URL.from_text('http://blog.hatnote.com/ask')
- url2 = url2.set('utm_source', 'geocity')
-
- url_map[url2] = 2
-
- assert len(url_map) == 1
- assert list(url_map.values()) == [2]
-
- assert hash(URL()) == hash(URL()) # slightly more sanity
-
- def test_dir(self):
- url = URL()
- res = dir(url)
-
- assert len(res) > 15
- # twisted compat
- assert 'fromText' not in res
- assert 'asText' not in res
- assert 'asURI' not in res
- assert 'asIRI' not in res
-
- def test_twisted_compat(self):
- url = URL.fromText(u'http://example.com/a%20té%C3%A9st')
- assert url.asText() == 'http://example.com/a%20té%C3%A9st'
- assert url.asURI().asText() == 'http://example.com/a%20t%C3%A9%C3%A9st'
- # TODO: assert url.asIRI().asText() == u'http://example.com/a%20téést'
-
- def test_set_ordering(self):
- # TODO
- url = URL.from_text('http://example.com/?a=b&c')
- url = url.set(u'x', u'x')
- url = url.add(u'x', u'y')
- assert url.to_text() == u'http://example.com/?a=b&x=x&c&x=y'
- # Would expect:
- # assert url.to_text() == u'http://example.com/?a=b&c&x=x&x=y'
-
- def test_schemeless_path(self):
- "See issue #4"
- u1 = URL.from_text("urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob")
- u2 = URL.from_text(u1.to_text())
- assert u1 == u2 # sanity testing roundtripping
-
- u3 = URL.from_text(u1.to_iri().to_text())
- assert u1 == u3
- assert u2 == u3
-
- # test that colons are ok past the first segment
- u4 = URL.from_text("first-segment/urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob")
- u5 = u4.to_iri()
- assert u5.to_text() == u'first-segment/urn:ietf:wg:oauth:2.0:oob'
-
- u6 = URL.from_text(u5.to_text()).to_uri()
- assert u5 == u6 # colons stay decoded bc they're not in the first seg
-
- def test_emoji_domain(self):
- "See issue #7, affecting only narrow builds (2.6-3.3)"
- url = URL.from_text('https://xn--vi8hiv.ws')
- iri = url.to_iri()
- iri.to_text()
- # as long as we don't get ValueErrors, we're good
-
- def test_delim_in_param(self):
- "Per issue #6 and #8"
- self.assertRaises(ValueError, URL, scheme=u'http', host=u'a/c')
- self.assertRaises(ValueError, URL, path=(u"?",))
- self.assertRaises(ValueError, URL, path=(u"#",))
- self.assertRaises(ValueError, URL, query=((u"&", "test")))
-
- def test_empty_paths_eq(self):
- u1 = URL.from_text('http://example.com/')
- u2 = URL.from_text('http://example.com')
-
- assert u1 == u2
-
- u1 = URL.from_text('http://example.com')
- u2 = URL.from_text('http://example.com')
-
- assert u1 == u2
-
- u1 = URL.from_text('http://example.com')
- u2 = URL.from_text('http://example.com/')
-
- assert u1 == u2
-
- u1 = URL.from_text('http://example.com/')
- u2 = URL.from_text('http://example.com/')
-
- assert u1 == u2
-
- def test_from_text_type(self):
- assert URL.from_text(u'#ok').fragment == u'ok' # sanity
- self.assertRaises(TypeError, URL.from_text, b'bytes://x.y.z')
- self.assertRaises(TypeError, URL.from_text, object())
-
- def test_from_text_bad_authority(self):
- # bad ipv6 brackets
- self.assertRaises(URLParseError, URL.from_text, 'http://[::1/')
- self.assertRaises(URLParseError, URL.from_text, 'http://::1]/')
- self.assertRaises(URLParseError, URL.from_text, 'http://[[::1]/')
- self.assertRaises(URLParseError, URL.from_text, 'http://[::1]]/')
-
- # empty port
- self.assertRaises(URLParseError, URL.from_text, 'http://127.0.0.1:')
- # non-integer port
- self.assertRaises(URLParseError, URL.from_text, 'http://127.0.0.1:hi')
- # extra port colon (makes for an invalid host)
- self.assertRaises(URLParseError, URL.from_text, 'http://127.0.0.1::80')
-
- def test_normalize(self):
- url = URL.from_text('HTTP://Example.com/A%61/./../A%61?B%62=C%63#D%64')
- assert url.get('Bb') == []
- assert url.get('B%62') == ['C%63']
- assert len(url.path) == 4
-
- # test that most expected normalizations happen
- norm_url = url.normalize()
-
- assert norm_url.scheme == 'http'
- assert norm_url.host == 'example.com'
- assert norm_url.path == ('Aa',)
- assert norm_url.get('Bb') == ['Cc']
- assert norm_url.fragment == 'Dd'
- assert norm_url.to_text() == 'http://example.com/Aa?Bb=Cc#Dd'
-
- # test that flags work
- noop_norm_url = url.normalize(scheme=False, host=False,
- path=False, query=False, fragment=False)
- assert noop_norm_url == url
-
- # test that empty paths get at least one slash
- slashless_url = URL.from_text('http://example.io')
- slashful_url = slashless_url.normalize()
- assert slashful_url.to_text() == 'http://example.io/'
-
- # test case normalization for percent encoding
- delimited_url = URL.from_text('/a%2fb/cd%3f?k%3d=v%23#test')
- norm_delimited_url = delimited_url.normalize()
- assert norm_delimited_url.to_text() == '/a%2Fb/cd%3F?k%3D=v%23#test'
-
- # test invalid percent encoding during normalize
- assert URL(path=('', '%te%sts')).normalize(percents=False).to_text() == '/%te%sts'
- assert URL(path=('', '%te%sts')).normalize().to_text() == '/%25te%25sts'
-
- percenty_url = URL(scheme='ftp', path=['%%%', '%a%b'], query=[('%', '%%')], fragment='%', userinfo='%:%')
-
- assert percenty_url.to_text(with_password=True) == 'ftp://%:%@/%%%/%a%b?%=%%#%'
- assert percenty_url.normalize().to_text(with_password=True) == 'ftp://%25:%25@/%25%25%25/%25a%25b?%25=%25%25#%25'
-
- def test_str(self):
- # see also issue #49
- text = u'http://example.com/á/y%20a%20y/?b=%25'
- url = URL.from_text(text)
- assert unicode(url) == text
- assert bytes(url) == b'http://example.com/%C3%A1/y%20a%20y/?b=%25'
-
- if PY2:
- assert isinstance(str(url), bytes)
- assert isinstance(unicode(url), unicode)
- else:
- assert isinstance(str(url), unicode)
- assert isinstance(bytes(url), bytes)
-
- def test_idna_corners(self):
- text = u'http://abé.com/'
- url = URL.from_text(text)
- assert url.to_iri().host == u'abé.com'
- assert url.to_uri().host == u'xn--ab-cja.com'
-
- url = URL.from_text("http://ドメイン.テスト.co.jp#test")
- assert url.to_iri().host == u'ドメイン.テスト.co.jp'
- assert url.to_uri().host == u'xn--eckwd4c7c.xn--zckzah.co.jp'
-
- assert url.to_uri().get_decoded_url().host == u'ドメイン.テスト.co.jp'
-
- assert URL.from_text('http://Example.com').to_uri().get_decoded_url().host == 'example.com'
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..e7efe6ae
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,10 @@
+[build-system]
+
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+
+[tool.black]
+
+line-length = 80
+target-version = ["py27"]
diff --git a/requirements-test.txt b/requirements-test.txt
deleted file mode 100644
index 0e9a261b..00000000
--- a/requirements-test.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-coverage==4.4.1
-idna==2.5
-pytest==2.9.2
-pytest-cov==2.3.0
-tox==2.6.0
diff --git a/setup.py b/setup.py
index 71cd4b18..f057fb8a 100644
--- a/setup.py
+++ b/setup.py
@@ -2,46 +2,56 @@
are you've used several just to read this text.
Hyperlink is a featureful, pure-Python implementation of the URL, with
-an emphasis on correctness. BSD licensed.
+an emphasis on correctness. MIT licensed.
See the docs at http://hyperlink.readthedocs.io.
"""
-from setuptools import setup
-
-
-__author__ = 'Mahmoud Hashemi and Glyph Lefkowitz'
-__version__ = '19.0.1dev'
-__contact__ = 'mahmoud@hatnote.com'
-__url__ = 'https://github.com/python-hyper/hyperlink'
-__license__ = 'MIT'
-
-
-setup(name='hyperlink',
- version=__version__,
- description="A featureful, immutable, and correct URL for Python.",
- long_description=__doc__,
- author=__author__,
- author_email=__contact__,
- url=__url__,
- packages=['hyperlink', 'hyperlink.test'],
- include_package_data=True,
- zip_safe=False,
- license=__license__,
- platforms='any',
- install_requires=['idna>=2.5'],
- classifiers=[
- 'Topic :: Utilities',
- 'Intended Audience :: Developers',
- 'Topic :: Software Development :: Libraries',
- 'Development Status :: 5 - Production/Stable',
- 'Programming Language :: Python :: 2.6',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3.4',
- 'Programming Language :: Python :: 3.5',
- 'Programming Language :: Python :: 3.6',
- 'Programming Language :: Python :: Implementation :: PyPy', ]
- )
+from setuptools import find_packages, setup
+
+
+__author__ = "Mahmoud Hashemi and Glyph Lefkowitz"
+__version__ = "21.0.1dev"
+__contact__ = "mahmoud@hatnote.com"
+__url__ = "https://github.com/python-hyper/hyperlink"
+__license__ = "MIT"
+
+
+setup(
+ name="hyperlink",
+ version=__version__,
+ description="A featureful, immutable, and correct URL for Python.",
+ long_description=__doc__,
+ author=__author__,
+ author_email=__contact__,
+ url=__url__,
+ packages=find_packages(where="src"),
+ package_dir={"": "src"},
+ package_data=dict(hyperlink=["py.typed", "idna-tables-properties.csv.gz"]),
+ zip_safe=False,
+ license=__license__,
+ platforms="any",
+ install_requires=["idna>=2.5", 'typing ; python_version<"3.5"'],
+ python_requires=">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*",
+ classifiers=[
+ "Topic :: Utilities",
+ "Intended Audience :: Developers",
+ "Topic :: Software Development :: Libraries",
+ "Development Status :: 5 - Production/Stable",
+ "Programming Language :: Python :: 2",
+ "Programming Language :: Python :: 2.6",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.4",
+ "Programming Language :: Python :: 3.5",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: Implementation :: PyPy",
+ "License :: OSI Approved :: MIT License",
+ ],
+)
"""
A brief checklist for release:
diff --git a/src/hyperlink/__init__.py b/src/hyperlink/__init__.py
new file mode 100644
index 00000000..f680b01a
--- /dev/null
+++ b/src/hyperlink/__init__.py
@@ -0,0 +1,17 @@
+from ._url import (
+ parse,
+ register_scheme,
+ URL,
+ EncodedURL,
+ DecodedURL,
+ URLParseError,
+)
+
+__all__ = (
+ "parse",
+ "register_scheme",
+ "URL",
+ "EncodedURL",
+ "DecodedURL",
+ "URLParseError",
+)
diff --git a/src/hyperlink/_socket.py b/src/hyperlink/_socket.py
new file mode 100644
index 00000000..3bcf8970
--- /dev/null
+++ b/src/hyperlink/_socket.py
@@ -0,0 +1,53 @@
+try:
+ from socket import inet_pton
+except ImportError:
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING: # pragma: no cover
+ pass
+ else:
+ # based on https://gist.github.com/nnemkin/4966028
+ # this code only applies on Windows Python 2.7
+ import ctypes
+ import socket
+
+ class SockAddr(ctypes.Structure):
+ _fields_ = [
+ ("sa_family", ctypes.c_short),
+ ("__pad1", ctypes.c_ushort),
+ ("ipv4_addr", ctypes.c_byte * 4),
+ ("ipv6_addr", ctypes.c_byte * 16),
+ ("__pad2", ctypes.c_ulong),
+ ]
+
+ WSAStringToAddressA = ctypes.windll.ws2_32.WSAStringToAddressA
+ WSAAddressToStringA = ctypes.windll.ws2_32.WSAAddressToStringA
+
+ def inet_pton(address_family, ip_string):
+ # type: (int, str) -> bytes
+ addr = SockAddr()
+ ip_string_bytes = ip_string.encode("ascii")
+ addr.sa_family = address_family
+ addr_size = ctypes.c_int(ctypes.sizeof(addr))
+
+ try:
+ attribute, size = {
+ socket.AF_INET: ("ipv4_addr", 4),
+ socket.AF_INET6: ("ipv6_addr", 16),
+ }[address_family]
+ except KeyError:
+ raise socket.error("unknown address family")
+
+ if (
+ WSAStringToAddressA(
+ ip_string_bytes,
+ address_family,
+ None,
+ ctypes.byref(addr),
+ ctypes.byref(addr_size),
+ )
+ != 0
+ ):
+ raise socket.error(ctypes.FormatError())
+
+ return ctypes.string_at(getattr(addr, attribute), size)
diff --git a/src/hyperlink/_url.py b/src/hyperlink/_url.py
new file mode 100644
index 00000000..8797b5cc
--- /dev/null
+++ b/src/hyperlink/_url.py
@@ -0,0 +1,2472 @@
+# -*- coding: utf-8 -*-
+u"""Hyperlink provides Pythonic URL parsing, construction, and rendering.
+
+Usage is straightforward::
+
+ >>> import hyperlink
+ >>> url = hyperlink.parse(u'http://github.com/mahmoud/hyperlink?utm_source=docs')
+ >>> url.host
+ u'github.com'
+ >>> secure_url = url.replace(scheme=u'https')
+ >>> secure_url.get('utm_source')[0]
+ u'docs'
+
+Hyperlink's API centers on the :class:`DecodedURL` type, which wraps
+the lower-level :class:`URL`, both of which can be returned by the
+:func:`parse()` convenience function.
+
+""" # noqa: E501
+
+import re
+import sys
+import string
+import socket
+from socket import AF_INET, AF_INET6
+
+try:
+ from socket import AddressFamily
+except ImportError:
+ AddressFamily = int # type: ignore[assignment,misc]
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Mapping,
+ Optional,
+ Sequence,
+ Text,
+ Tuple,
+ Type,
+ TypeVar,
+ Union,
+ cast,
+ TYPE_CHECKING,
+ overload,
+)
+from unicodedata import normalize
+from ._socket import inet_pton
+
+try:
+ from collections.abc import Mapping as MappingABC
+except ImportError: # Python 2
+ from collections import Mapping as MappingABC
+
+from idna import encode as idna_encode, decode as idna_decode
+
+
+PY2 = sys.version_info[0] == 2
+try:
+ unichr
+except NameError: # Py3
+ unichr = chr # type: Callable[[int], Text]
+NoneType = type(None) # type: Type[None]
+QueryPairs = Tuple[Tuple[Text, Optional[Text]], ...] # internal representation
+QueryParameters = Union[
+ Mapping[Text, Optional[Text]],
+ QueryPairs,
+ Iterable[Tuple[Text, Optional[Text]]],
+]
+T = TypeVar("T")
+# Literal is not available in all pythons so we only bring it in for mypy.
+if TYPE_CHECKING:
+ from typing import Literal
+
+
+# from boltons.typeutils
+def make_sentinel(name="_MISSING", var_name=""):
+ # type: (str, str) -> object
+ """Creates and returns a new **instance** of a new class, suitable for
+ usage as a "sentinel", a kind of singleton often used to indicate
+ a value is missing when ``None`` is a valid input.
+
+ Args:
+ name: Name of the Sentinel
+ var_name: Set this name to the name of the variable in its respective
+ module enable pickle-ability.
+
+ >>> make_sentinel(var_name='_MISSING')
+ _MISSING
+
+ The most common use cases here in boltons are as default values
+ for optional function arguments, partly because of its
+ less-confusing appearance in automatically generated
+ documentation. Sentinels also function well as placeholders in queues
+ and linked lists.
+
+ .. note::
+
+ By design, additional calls to ``make_sentinel`` with the same
+ values will not produce equivalent objects.
+
+ >>> make_sentinel('TEST') == make_sentinel('TEST')
+ False
+ >>> type(make_sentinel('TEST')) == type(make_sentinel('TEST'))
+ False
+ """
+
+ class Sentinel(object):
+ def __init__(self):
+ # type: () -> None
+ self.name = name
+ self.var_name = var_name
+
+ def __repr__(self):
+ # type: () -> str
+ if self.var_name:
+ return self.var_name
+ return "%s(%r)" % (self.__class__.__name__, self.name)
+
+ if var_name:
+ # superclass type hints don't allow str return type, but it is
+ # allowed in the docs, hence the ignore[override] below
+ def __reduce__(self):
+ # type: () -> str
+ return self.var_name
+
+ def __nonzero__(self):
+ # type: () -> bool
+ return False
+
+ __bool__ = __nonzero__
+
+ return Sentinel()
+
+
+_unspecified = _UNSET = make_sentinel("_UNSET") # type: Any
+
+
+# RFC 3986 Section 2.3, Unreserved URI Characters
+# https://tools.ietf.org/html/rfc3986#section-2.3
+_UNRESERVED_CHARS = frozenset(
+ "~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"
+)
+
+
+# URL parsing regex (based on RFC 3986 Appendix B, with modifications)
+_URL_RE = re.compile(
+ r"^((?P[^:/?#]+):)?"
+ r"((?P<_netloc_sep>//)"
+ r"(?P[^/?#]*))?"
+ r"(?P[^?#]*)"
+ r"(\?(?P[^#]*))?"
+ r"(#(?P.*))?$"
+)
+_SCHEME_RE = re.compile(r"^[a-zA-Z0-9+-.]*$")
+_AUTHORITY_RE = re.compile(
+ r"^(?:(?P[^@/?#]*)@)?"
+ r"(?P"
+ r"(?:\[(?P[^[\]/?#]*)\])"
+ r"|(?P[^:/?#[\]]*)"
+ r"|(?P.*?))?"
+ r"(?::(?P.*))?$"
+)
+
+
+_HEX_CHAR_MAP = dict(
+ [
+ ((a + b).encode("ascii"), unichr(int(a + b, 16)).encode("charmap"))
+ for a in string.hexdigits
+ for b in string.hexdigits
+ ]
+)
+_ASCII_RE = re.compile("([\x00-\x7f]+)")
+
+# RFC 3986 section 2.2, Reserved Characters
+# https://tools.ietf.org/html/rfc3986#section-2.2
+_GEN_DELIMS = frozenset(u":/?#[]@")
+_SUB_DELIMS = frozenset(u"!$&'()*+,;=")
+_ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS
+
+_USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u"%")
+_USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE
+_PATH_SAFE = _USERINFO_SAFE | set(u":@")
+_PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE
+_SCHEMELESS_PATH_SAFE = _PATH_SAFE - set(":")
+_SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE
+_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u"/?")
+_FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE
+_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&")
+_QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE
+_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u"=")
+_QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE
+
+
+def _make_decode_map(delims, allow_percent=False):
+ # type: (Iterable[Text], bool) -> Mapping[bytes, bytes]
+ ret = dict(_HEX_CHAR_MAP)
+ if not allow_percent:
+ delims = set(delims) | set([u"%"])
+ for delim in delims:
+ _hexord = "{0:02X}".format(ord(delim)).encode("ascii")
+ _hexord_lower = _hexord.lower()
+ ret.pop(_hexord)
+ if _hexord != _hexord_lower:
+ ret.pop(_hexord_lower)
+ return ret
+
+
+def _make_quote_map(safe_chars):
+ # type: (Iterable[Text]) -> Mapping[Union[int, Text], Text]
+ ret = {} # type: Dict[Union[int, Text], Text]
+ # v is included in the dict for py3 mostly, because bytestrings
+ # are iterables of ints, of course!
+ for i, v in zip(range(256), range(256)):
+ c = chr(v)
+ if c in safe_chars:
+ ret[c] = ret[v] = c
+ else:
+ ret[c] = ret[v] = "%{0:02X}".format(i)
+ return ret
+
+
+_USERINFO_PART_QUOTE_MAP = _make_quote_map(_USERINFO_SAFE)
+_USERINFO_DECODE_MAP = _make_decode_map(_USERINFO_DELIMS)
+_PATH_PART_QUOTE_MAP = _make_quote_map(_PATH_SAFE)
+_SCHEMELESS_PATH_PART_QUOTE_MAP = _make_quote_map(_SCHEMELESS_PATH_SAFE)
+_PATH_DECODE_MAP = _make_decode_map(_PATH_DELIMS)
+_QUERY_KEY_QUOTE_MAP = _make_quote_map(_QUERY_KEY_SAFE)
+_QUERY_KEY_DECODE_MAP = _make_decode_map(_QUERY_KEY_DELIMS)
+_QUERY_VALUE_QUOTE_MAP = _make_quote_map(_QUERY_VALUE_SAFE)
+_QUERY_VALUE_DECODE_MAP = _make_decode_map(_QUERY_VALUE_DELIMS | set("+"))
+_FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE)
+_FRAGMENT_DECODE_MAP = _make_decode_map(_FRAGMENT_DELIMS)
+_UNRESERVED_QUOTE_MAP = _make_quote_map(_UNRESERVED_CHARS)
+_UNRESERVED_DECODE_MAP = dict(
+ [
+ (k, v)
+ for k, v in _HEX_CHAR_MAP.items()
+ if v.decode("ascii", "replace") in _UNRESERVED_CHARS
+ ]
+)
+
+_ROOT_PATHS = frozenset(((), (u"",)))
+
+
+def _encode_reserved(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """A very comprehensive percent encoding for encoding all
+ delimiters. Used for arguments to DecodedURL, where a % means a
+ percent sign, and not the character used by URLs for escaping
+ bytes.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_UNRESERVED_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _UNRESERVED_QUOTE_MAP[t] if t in _UNRESERVED_CHARS else t
+ for t in text
+ ]
+ )
+
+
+def _encode_path_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ "Percent-encode a single segment of a URL path."
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_PATH_PART_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t for t in text]
+ )
+
+
+def _encode_schemeless_path_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """Percent-encode the first segment of a URL path for a URL without a
+ scheme specified.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _SCHEMELESS_PATH_PART_QUOTE_MAP[t]
+ if t in _SCHEMELESS_PATH_DELIMS
+ else t
+ for t in text
+ ]
+ )
+
+
+def _encode_path_parts(
+ text_parts, # type: Sequence[Text]
+ rooted=False, # type: bool
+ has_scheme=True, # type: bool
+ has_authority=True, # type: bool
+ maximal=True, # type: bool
+):
+ # type: (...) -> Sequence[Text]
+ """
+ Percent-encode a tuple of path parts into a complete path.
+
+ Setting *maximal* to False percent-encodes only the reserved
+ characters that are syntactically necessary for serialization,
+ preserving any IRI-style textual data.
+
+ Leaving *maximal* set to its default True percent-encodes
+ everything required to convert a portion of an IRI to a portion of
+ a URI.
+
+ RFC 3986 3.3:
+
+ If a URI contains an authority component, then the path component
+ must either be empty or begin with a slash ("/") character. If a URI
+ does not contain an authority component, then the path cannot begin
+ with two slash characters ("//"). In addition, a URI reference
+ (Section 4.1) may be a relative-path reference, in which case the
+ first path segment cannot contain a colon (":") character.
+ """
+ if not text_parts:
+ return ()
+ if rooted:
+ text_parts = (u"",) + tuple(text_parts)
+ # elif has_authority and text_parts:
+ # raise Exception('see rfc above') # TODO: too late to fail like this?
+ encoded_parts = [] # type: List[Text]
+ if has_scheme:
+ encoded_parts = [
+ _encode_path_part(part, maximal=maximal) if part else part
+ for part in text_parts
+ ]
+ else:
+ encoded_parts = [_encode_schemeless_path_part(text_parts[0])]
+ encoded_parts.extend(
+ [
+ _encode_path_part(part, maximal=maximal) if part else part
+ for part in text_parts[1:]
+ ]
+ )
+ return tuple(encoded_parts)
+
+
+def _encode_query_key(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """
+ Percent-encode a single query string key or value.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_QUERY_KEY_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [_QUERY_KEY_QUOTE_MAP[t] if t in _QUERY_KEY_DELIMS else t for t in text]
+ )
+
+
+def _encode_query_value(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """
+ Percent-encode a single query string key or value.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_QUERY_VALUE_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _QUERY_VALUE_QUOTE_MAP[t] if t in _QUERY_VALUE_DELIMS else t
+ for t in text
+ ]
+ )
+
+
+def _encode_fragment_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """Quote the fragment part of the URL. Fragments don't have
+ subdelimiters, so the whole URL fragment can be passed.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t for t in text]
+ )
+
+
+def _encode_userinfo_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """Quote special characters in either the username or password
+ section of the URL.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS else t
+ for t in text
+ ]
+ )
+
+
+# This port list painstakingly curated by hand searching through
+# https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
+# and
+# https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml
+SCHEME_PORT_MAP = {
+ "acap": 674,
+ "afp": 548,
+ "dict": 2628,
+ "dns": 53,
+ "file": None,
+ "ftp": 21,
+ "git": 9418,
+ "gopher": 70,
+ "http": 80,
+ "https": 443,
+ "imap": 143,
+ "ipp": 631,
+ "ipps": 631,
+ "irc": 194,
+ "ircs": 6697,
+ "ldap": 389,
+ "ldaps": 636,
+ "mms": 1755,
+ "msrp": 2855,
+ "msrps": None,
+ "mtqp": 1038,
+ "nfs": 111,
+ "nntp": 119,
+ "nntps": 563,
+ "pop": 110,
+ "prospero": 1525,
+ "redis": 6379,
+ "rsync": 873,
+ "rtsp": 554,
+ "rtsps": 322,
+ "rtspu": 5005,
+ "sftp": 22,
+ "smb": 445,
+ "snmp": 161,
+ "ssh": 22,
+ "steam": None,
+ "svn": 3690,
+ "telnet": 23,
+ "ventrilo": 3784,
+ "vnc": 5900,
+ "wais": 210,
+ "ws": 80,
+ "wss": 443,
+ "xmpp": None,
+}
+
+# This list of schemes that don't use authorities is also from the link above.
+NO_NETLOC_SCHEMES = set(
+ [
+ "urn",
+ "about",
+ "bitcoin",
+ "blob",
+ "data",
+ "geo",
+ "magnet",
+ "mailto",
+ "news",
+ "pkcs11",
+ "sip",
+ "sips",
+ "tel",
+ ]
+)
+# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
+
+NO_QUERY_PLUS_SCHEMES = set()
+
+
+def register_scheme(
+ text, uses_netloc=True, default_port=None, query_plus_is_space=True
+):
+ # type: (Text, bool, Optional[int], bool) -> None
+ """Registers new scheme information, resulting in correct port and
+ slash behavior from the URL object. There are dozens of standard
+ schemes preregistered, so this function is mostly meant for
+ proprietary internal customizations or stopgaps on missing
+ standards information. If a scheme seems to be missing, please
+ `file an issue`_!
+
+ Args:
+ text: A string representation of the scheme.
+ (the 'http' in 'http://hatnote.com')
+ uses_netloc: Does the scheme support specifying a
+ network host? For instance, "http" does, "mailto" does
+ not. Defaults to True.
+ default_port: The default port, if any, for
+ netloc-using schemes.
+ query_plus_is_space: If true, a "+" in the query string should be
+ decoded as a space by DecodedURL.
+
+ .. _file an issue: https://github.com/mahmoud/hyperlink/issues
+ """
+ text = text.lower()
+ if default_port is not None:
+ try:
+ default_port = int(default_port)
+ except (ValueError, TypeError):
+ raise ValueError(
+ "default_port expected integer or None, not %r"
+ % (default_port,)
+ )
+
+ if uses_netloc is True:
+ SCHEME_PORT_MAP[text] = default_port
+ elif uses_netloc is False:
+ if default_port is not None:
+ raise ValueError(
+ "unexpected default port while specifying"
+ " non-netloc scheme: %r" % default_port
+ )
+ NO_NETLOC_SCHEMES.add(text)
+ else:
+ raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc)
+
+ if not query_plus_is_space:
+ NO_QUERY_PLUS_SCHEMES.add(text)
+
+ return
+
+
+def scheme_uses_netloc(scheme, default=None):
+ # type: (Text, Optional[bool]) -> Optional[bool]
+ """Whether or not a URL uses :code:`:` or :code:`://` to separate the
+ scheme from the rest of the URL depends on the scheme's own
+ standard definition. There is no way to infer this behavior
+ from other parts of the URL. A scheme either supports network
+ locations or it does not.
+
+ The URL type's approach to this is to check for explicitly
+ registered schemes, with common schemes like HTTP
+ preregistered. This is the same approach taken by
+ :mod:`urlparse`.
+
+ URL adds two additional heuristics if the scheme as a whole is
+ not registered. First, it attempts to check the subpart of the
+ scheme after the last ``+`` character. This adds intuitive
+ behavior for schemes like ``git+ssh``. Second, if a URL with
+ an unrecognized scheme is loaded, it will maintain the
+ separator it sees.
+ """
+ if not scheme:
+ return False
+ scheme = scheme.lower()
+ if scheme in SCHEME_PORT_MAP:
+ return True
+ if scheme in NO_NETLOC_SCHEMES:
+ return False
+ if scheme.split("+")[-1] in SCHEME_PORT_MAP:
+ return True
+ return default
+
+
+class URLParseError(ValueError):
+ """Exception inheriting from :exc:`ValueError`, raised when failing to
+ parse a URL. Mostly raised on invalid ports and IPv6 addresses.
+ """
+
+ pass
+
+
+def _optional(argument, default):
+ # type: (Any, Any) -> Any
+ if argument is _UNSET:
+ return default
+ else:
+ return argument
+
+
+def _typecheck(name, value, *types):
+ # type: (Text, T, Type[Any]) -> T
+ """
+ Check that the given *value* is one of the given *types*, or raise an
+ exception describing the problem using *name*.
+ """
+ if not types:
+ raise ValueError("expected one or more types, maybe use _textcheck?")
+ if not isinstance(value, types):
+ raise TypeError(
+ "expected %s for %s, got %r"
+ % (" or ".join([t.__name__ for t in types]), name, value)
+ )
+ return value
+
+
+def _textcheck(name, value, delims=frozenset(), nullable=False):
+ # type: (Text, T, Iterable[Text], bool) -> T
+ if not isinstance(value, Text):
+ if nullable and value is None:
+ # used by query string values
+ return value # type: ignore[unreachable]
+ else:
+ str_name = "unicode" if PY2 else "str"
+ exp = str_name + " or NoneType" if nullable else str_name
+ raise TypeError("expected %s for %s, got %r" % (exp, name, value))
+ if delims and set(value) & set(delims): # TODO: test caching into regexes
+ raise ValueError(
+ "one or more reserved delimiters %s present in %s: %r"
+ % ("".join(delims), name, value)
+ )
+ return value # type: ignore[return-value] # T vs. Text
+
+
+def iter_pairs(iterable):
+ # type: (Iterable[Any]) -> Iterator[Any]
+ """
+ Iterate over the (key, value) pairs in ``iterable``.
+
+ This handles dictionaries sensibly, and falls back to assuming the
+ iterable yields (key, value) pairs. This behaviour is similar to
+ what Python's ``dict()`` constructor does.
+ """
+ if isinstance(iterable, MappingABC):
+ iterable = iterable.items()
+ return iter(iterable)
+
+
+def _decode_unreserved(text, normalize_case=False, encode_stray_percents=False):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_UNRESERVED_DECODE_MAP,
+ )
+
+
+def _decode_userinfo_part(
+ text, normalize_case=False, encode_stray_percents=False
+):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_USERINFO_DECODE_MAP,
+ )
+
+
+def _decode_path_part(text, normalize_case=False, encode_stray_percents=False):
+ # type: (Text, bool, bool) -> Text
+ """
+ >>> _decode_path_part(u'%61%77%2f%7a')
+ u'aw%2fz'
+ >>> _decode_path_part(u'%61%77%2f%7a', normalize_case=True)
+ u'aw%2Fz'
+ """
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_PATH_DECODE_MAP,
+ )
+
+
+def _decode_query_key(text, normalize_case=False, encode_stray_percents=False):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_QUERY_KEY_DECODE_MAP,
+ )
+
+
+def _decode_query_value(
+ text, normalize_case=False, encode_stray_percents=False
+):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_QUERY_VALUE_DECODE_MAP,
+ )
+
+
+def _decode_fragment_part(
+ text, normalize_case=False, encode_stray_percents=False
+):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_FRAGMENT_DECODE_MAP,
+ )
+
+
+def _percent_decode(
+ text, # type: Text
+ normalize_case=False, # type: bool
+ subencoding="utf-8", # type: Text
+ raise_subencoding_exc=False, # type: bool
+ encode_stray_percents=False, # type: bool
+ _decode_map=_HEX_CHAR_MAP, # type: Mapping[bytes, bytes]
+):
+ # type: (...) -> Text
+ """Convert percent-encoded text characters to their normal,
+ human-readable equivalents.
+
+ All characters in the input text must be encodable by
+ *subencoding*. All special characters underlying the values in the
+ percent-encoding must be decodable as *subencoding*. If a
+ non-*subencoding*-valid string is passed, the original text is
+ returned with no changes applied.
+
+ Only called by field-tailored variants, e.g.,
+ :func:`_decode_path_part`, as every percent-encodable part of the
+ URL has characters which should not be percent decoded.
+
+ >>> _percent_decode(u'abc%20def')
+ u'abc def'
+
+ Args:
+ text: Text with percent-encoding present.
+ normalize_case: Whether undecoded percent segments, such as encoded
+ delimiters, should be uppercased, per RFC 3986 Section 2.1.
+ See :func:`_decode_path_part` for an example.
+ subencoding: The name of the encoding underlying the percent-encoding.
+ raise_subencoding_exc: Whether an error in decoding the bytes
+ underlying the percent-decoding should be raised.
+
+ Returns:
+ Text: The percent-decoded version of *text*, decoded by *subencoding*.
+ """
+ try:
+ quoted_bytes = text.encode(subencoding)
+ except UnicodeEncodeError:
+ return text
+
+ bits = quoted_bytes.split(b"%")
+ if len(bits) == 1:
+ return text
+
+ res = [bits[0]]
+ append = res.append
+
+ for item in bits[1:]:
+ hexpair, rest = item[:2], item[2:]
+ try:
+ append(_decode_map[hexpair])
+ append(rest)
+ except KeyError:
+ pair_is_hex = hexpair in _HEX_CHAR_MAP
+ if pair_is_hex or not encode_stray_percents:
+ append(b"%")
+ else:
+ # if it's undecodable, treat as a real percent sign,
+ # which is reserved (because it wasn't in the
+ # context-aware _decode_map passed in), and should
+ # stay in an encoded state.
+ append(b"%25")
+ if normalize_case and pair_is_hex:
+ append(hexpair.upper())
+ append(rest)
+ else:
+ append(item)
+
+ unquoted_bytes = b"".join(res)
+
+ try:
+ return unquoted_bytes.decode(subencoding)
+ except UnicodeDecodeError:
+ if raise_subencoding_exc:
+ raise
+ return text
+
+
+def _decode_host(host):
+ # type: (Text) -> Text
+ """Decode a host from ASCII-encodable text to IDNA-decoded text. If
+ the host text is not ASCII, it is returned unchanged, as it is
+ presumed that it is already IDNA-decoded.
+
+ Some technical details: _decode_host is built on top of the "idna"
+ package, which has some quirks:
+
+ Capital letters are not valid IDNA2008. The idna package will
+ raise an exception like this on capital letters:
+
+ > idna.core.InvalidCodepoint: Codepoint U+004B at position 1 ... not allowed
+
+ However, if a segment of a host (i.e., something in
+ url.host.split('.')) is already ASCII, idna doesn't perform its
+ usual checks. In fact, for capital letters it automatically
+ lowercases them.
+
+ This check and some other functionality can be bypassed by passing
+ uts46=True to idna.encode/decode. This allows a more permissive and
+ convenient interface. So far it seems like the balanced approach.
+
+ Example output (from idna==2.6):
+
+ >> idna.encode(u'mahmöud.io')
+ 'xn--mahmud-zxa.io'
+ >> idna.encode(u'Mahmöud.io')
+ Traceback (most recent call last):
+ File "", line 1, in
+ File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 355, in encode
+ result.append(alabel(label))
+ File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 276, in alabel
+ check_label(label)
+ File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 253, in check_label
+ raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
+ idna.core.InvalidCodepoint: Codepoint U+004D at position 1 of u'Mahm\xf6ud' not allowed
+ >> idna.encode(u'Mahmoud.io')
+ 'Mahmoud.io'
+
+ # Similar behavior for decodes below
+ >> idna.decode(u'Mahmoud.io')
+ u'mahmoud.io
+ >> idna.decode(u'Méhmoud.io', uts46=True)
+ u'm\xe9hmoud.io'
+ """ # noqa: E501
+ if not host:
+ return u""
+ try:
+ host_bytes = host.encode("ascii")
+ except UnicodeEncodeError:
+ host_text = host
+ else:
+ try:
+ host_text = idna_decode(host_bytes, uts46=True)
+ except ValueError:
+ # only reached on "narrow" (UCS-2) Python builds <3.4, see #7
+ # NOTE: not going to raise here, because there's no
+ # ambiguity in the IDNA, and the host is still
+ # technically usable
+ host_text = host
+ return host_text
+
+
+def _resolve_dot_segments(path):
+ # type: (Sequence[Text]) -> Sequence[Text]
+ """Normalize the URL path by resolving segments of '.' and '..'. For
+ more details, see `RFC 3986 section 5.2.4, Remove Dot Segments`_.
+
+ Args:
+ path: sequence of path segments in text form
+
+ Returns:
+ A new sequence of path segments with the '.' and '..' elements removed
+ and resolved.
+
+ .. _RFC 3986 section 5.2.4, Remove Dot Segments: https://tools.ietf.org/html/rfc3986#section-5.2.4
+ """ # noqa: E501
+ segs = [] # type: List[Text]
+
+ for seg in path:
+ if seg == u".":
+ pass
+ elif seg == u"..":
+ if segs:
+ segs.pop()
+ else:
+ segs.append(seg)
+
+ if list(path[-1:]) in ([u"."], [u".."]):
+ segs.append(u"")
+
+ return segs
+
+
+def parse_host(host):
+ # type: (Text) -> Tuple[Optional[AddressFamily], Text]
+ """Parse the host into a tuple of ``(family, host)``, where family
+ is the appropriate :mod:`socket` module constant when the host is
+ an IP address. Family is ``None`` when the host is not an IP.
+
+ Will raise :class:`URLParseError` on invalid IPv6 constants.
+
+ Returns:
+ family (socket constant or None), host (string)
+
+ >>> import socket
+ >>> parse_host('googlewebsite.com') == (None, 'googlewebsite.com')
+ True
+ >>> parse_host('::1') == (socket.AF_INET6, '::1')
+ True
+ >>> parse_host('192.168.1.1') == (socket.AF_INET, '192.168.1.1')
+ True
+ """
+ if not host:
+ return None, u""
+
+ if u":" in host:
+ try:
+ inet_pton(AF_INET6, host)
+ except socket.error as se:
+ raise URLParseError("invalid IPv6 host: %r (%r)" % (host, se))
+ except UnicodeEncodeError:
+ pass # TODO: this can't be a real host right?
+ else:
+ family = AF_INET6 # type: Optional[AddressFamily]
+ else:
+ try:
+ inet_pton(AF_INET, host)
+ except (socket.error, UnicodeEncodeError):
+ family = None # not an IP
+ else:
+ family = AF_INET
+
+ return family, host
+
+
+class URL(object):
+ r"""From blogs to billboards, URLs are so common, that it's easy to
+ overlook their complexity and power. With hyperlink's
+ :class:`URL` type, working with URLs doesn't have to be hard.
+
+ URLs are made of many parts. Most of these parts are officially
+ named in `RFC 3986`_ and this diagram may prove handy in identifying
+ them::
+
+ foo://user:pass@example.com:8042/over/there?name=ferret#nose
+ \_/ \_______/ \_________/ \__/\_________/ \_________/ \__/
+ | | | | | | |
+ scheme userinfo host port path query fragment
+
+ While :meth:`~URL.from_text` is used for parsing whole URLs, the
+ :class:`URL` constructor builds a URL from the individual
+ components, like so::
+
+ >>> from hyperlink import URL
+ >>> url = URL(scheme=u'https', host=u'example.com', path=[u'hello', u'world'])
+ >>> print(url.to_text())
+ https://example.com/hello/world
+
+ The constructor runs basic type checks. All strings are expected
+ to be text (:class:`str` in Python 3, :class:`unicode` in Python 2). All
+ arguments are optional, defaulting to appropriately empty values. A full
+ list of constructor arguments is below.
+
+ Args:
+ scheme: The text name of the scheme.
+ host: The host portion of the network location
+ port: The port part of the network location. If ``None`` or no port is
+ passed, the port will default to the default port of the scheme, if
+ it is known. See the ``SCHEME_PORT_MAP`` and
+ :func:`register_default_port` for more info.
+ path: A tuple of strings representing the slash-separated parts of the
+ path, each percent-encoded.
+ query: The query parameters, as a dictionary or as an sequence of
+ percent-encoded key-value pairs.
+ fragment: The fragment part of the URL.
+ rooted: A rooted URL is one which indicates an absolute path.
+ This is True on any URL that includes a host, or any relative URL
+ that starts with a slash.
+ userinfo: The username or colon-separated username:password pair.
+ uses_netloc: Indicates whether ``://`` (the "netloc separator") will
+ appear to separate the scheme from the *path* in cases where no
+ host is present.
+ Setting this to ``True`` is a non-spec-compliant affordance for the
+ common practice of having URIs that are *not* URLs (cannot have a
+ 'host' part) but nevertheless use the common ``://`` idiom that
+ most people associate with URLs; e.g. ``message:`` URIs like
+ ``message://message-id`` being equivalent to ``message:message-id``.
+ This may be inferred based on the scheme depending on whether
+ :func:`register_scheme` has been used to register the scheme and
+ should not be passed directly unless you know the scheme works like
+ this and you know it has not been registered.
+
+ All of these parts are also exposed as read-only attributes of :class:`URL`
+ instances, along with several useful methods.
+
+ .. _RFC 3986: https://tools.ietf.org/html/rfc3986
+ .. _RFC 3987: https://tools.ietf.org/html/rfc3987
+ """ # noqa: E501
+
+ def __init__(
+ self,
+ scheme=None, # type: Optional[Text]
+ host=None, # type: Optional[Text]
+ path=(), # type: Iterable[Text]
+ query=(), # type: QueryParameters
+ fragment=u"", # type: Text
+ port=None, # type: Optional[int]
+ rooted=None, # type: Optional[bool]
+ userinfo=u"", # type: Text
+ uses_netloc=None, # type: Optional[bool]
+ ):
+ # type: (...) -> None
+ if host is not None and scheme is None:
+ scheme = u"http" # TODO: why
+ if port is None and scheme is not None:
+ port = SCHEME_PORT_MAP.get(scheme)
+ if host and query and not path:
+ # per RFC 3986 6.2.3, "a URI that uses the generic syntax
+ # for authority with an empty path should be normalized to
+ # a path of '/'."
+ path = (u"",)
+
+ # Now that we're done detecting whether they were passed, we can set
+ # them to their defaults:
+ if scheme is None:
+ scheme = u""
+ if host is None:
+ host = u""
+ if rooted is None:
+ rooted = bool(host)
+
+ # Set attributes.
+ self._scheme = _textcheck("scheme", scheme)
+ if self._scheme:
+ if not _SCHEME_RE.match(self._scheme):
+ raise ValueError(
+ 'invalid scheme: %r. Only alphanumeric, "+",'
+ ' "-", and "." allowed. Did you meant to call'
+ " %s.from_text()?" % (self._scheme, self.__class__.__name__)
+ )
+
+ _, self._host = parse_host(_textcheck("host", host, "/?#@"))
+ if isinstance(path, Text):
+ raise TypeError(
+ "expected iterable of text for path, not: %r" % (path,)
+ )
+ self._path = tuple(
+ (_textcheck("path segment", segment, "/?#") for segment in path)
+ )
+ self._query = tuple(
+ (
+ _textcheck("query parameter name", k, "&=#"),
+ _textcheck("query parameter value", v, "", nullable=True),
+ )
+ for k, v in iter_pairs(query)
+ )
+ self._fragment = _textcheck("fragment", fragment)
+ self._port = _typecheck("port", port, int, NoneType)
+ self._rooted = _typecheck("rooted", rooted, bool)
+ self._userinfo = _textcheck("userinfo", userinfo, "/?#@")
+
+ if uses_netloc is None:
+ uses_netloc = scheme_uses_netloc(self._scheme, uses_netloc)
+ self._uses_netloc = _typecheck(
+ "uses_netloc", uses_netloc, bool, NoneType
+ )
+ will_have_authority = self._host or (
+ self._port and self._port != SCHEME_PORT_MAP.get(scheme)
+ )
+ if will_have_authority:
+ # fixup for rooted consistency; if there's any 'authority'
+ # represented in the textual URL, then the path must be rooted, and
+ # we're definitely using a netloc (there must be a ://).
+ self._rooted = True
+ self._uses_netloc = True
+ if (not self._rooted) and self.path[:1] == (u"",):
+ self._rooted = True
+ self._path = self._path[1:]
+ if not will_have_authority and self._path and not self._rooted:
+ # If, after fixing up the path, there *is* a path and it *isn't*
+ # rooted, then we are definitely not using a netloc; if we did, it
+ # would make the path (erroneously) look like a hostname.
+ self._uses_netloc = False
+
+ def get_decoded_url(self, lazy=False):
+ # type: (bool) -> DecodedURL
+ try:
+ return self._decoded_url
+ except AttributeError:
+ self._decoded_url = DecodedURL(self, lazy=lazy) # type: DecodedURL
+ return self._decoded_url
+
+ @property
+ def scheme(self):
+ # type: () -> Text
+ """The scheme is a string, and the first part of an absolute URL, the
+ part before the first colon, and the part which defines the
+ semantics of the rest of the URL. Examples include "http",
+ "https", "ssh", "file", "mailto", and many others. See
+ :func:`~hyperlink.register_scheme()` for more info.
+ """
+ return self._scheme
+
+ @property
+ def host(self):
+ # type: () -> Text
+ """The host is a string, and the second standard part of an absolute
+ URL. When present, a valid host must be a domain name, or an
+ IP (v4 or v6). It occurs before the first slash, or the second
+ colon, if a :attr:`~hyperlink.URL.port` is provided.
+ """
+ return self._host
+
+ @property
+ def port(self):
+ # type: () -> Optional[int]
+ """The port is an integer that is commonly used in connecting to the
+ :attr:`host`, and almost never appears without it.
+
+ When not present in the original URL, this attribute defaults
+ to the scheme's default port. If the scheme's default port is
+ not known, and the port is not provided, this attribute will
+ be set to None.
+
+ >>> URL.from_text(u'http://example.com/pa/th').port
+ 80
+ >>> URL.from_text(u'foo://example.com/pa/th').port
+ >>> URL.from_text(u'foo://example.com:8042/pa/th').port
+ 8042
+
+ .. note::
+
+ Per the standard, when the port is the same as the schemes
+ default port, it will be omitted in the text URL.
+ """
+ return self._port
+
+ @property
+ def path(self):
+ # type: () -> Sequence[Text]
+ """A tuple of strings, created by splitting the slash-separated
+ hierarchical path. Started by the first slash after the host,
+ terminated by a "?", which indicates the start of the
+ :attr:`~hyperlink.URL.query` string.
+ """
+ return self._path
+
+ @property
+ def query(self):
+ # type: () -> QueryPairs
+ """Tuple of pairs, created by splitting the ampersand-separated
+ mapping of keys and optional values representing
+ non-hierarchical data used to identify the resource. Keys are
+ always strings. Values are strings when present, or None when
+ missing.
+
+ For more operations on the mapping, see
+ :meth:`~hyperlink.URL.get()`, :meth:`~hyperlink.URL.add()`,
+ :meth:`~hyperlink.URL.set()`, and
+ :meth:`~hyperlink.URL.delete()`.
+ """
+ return self._query
+
+ @property
+ def fragment(self):
+ # type: () -> Text
+ """A string, the last part of the URL, indicated by the first "#"
+ after the :attr:`~hyperlink.URL.path` or
+ :attr:`~hyperlink.URL.query`. Enables indirect identification
+ of a secondary resource, like an anchor within an HTML page.
+ """
+ return self._fragment
+
+ @property
+ def rooted(self):
+ # type: () -> bool
+ """Whether or not the path starts with a forward slash (``/``).
+
+ This is taken from the terminology in the BNF grammar,
+ specifically the "path-rootless", rule, since "absolute path"
+ and "absolute URI" are somewhat ambiguous. :attr:`path` does
+ not contain the implicit prefixed ``"/"`` since that is
+ somewhat awkward to work with.
+ """
+ return self._rooted
+
+ @property
+ def userinfo(self):
+ # type: () -> Text
+ """The colon-separated string forming the username-password
+ combination.
+ """
+ return self._userinfo
+
+ @property
+ def uses_netloc(self):
+ # type: () -> Optional[bool]
+ """
+ Indicates whether ``://`` (the "netloc separator") will appear to
+ separate the scheme from the *path* in cases where no host is present.
+ """
+ return self._uses_netloc
+
+ @property
+ def user(self):
+ # type: () -> Text
+ """
+ The user portion of :attr:`~hyperlink.URL.userinfo`.
+ """
+ return self.userinfo.split(u":")[0]
+
+ def authority(self, with_password=False, **kw):
+ # type: (bool, Any) -> Text
+ """Compute and return the appropriate host/port/userinfo combination.
+
+ >>> url = URL.from_text(u'http://user:pass@localhost:8080/a/b?x=y')
+ >>> url.authority()
+ u'user:@localhost:8080'
+ >>> url.authority(with_password=True)
+ u'user:pass@localhost:8080'
+
+ Args:
+ with_password: Whether the return value of this method include the
+ password in the URL, if it is set.
+ Defaults to False.
+
+ Returns:
+ Text: The authority (network location and user information) portion
+ of the URL.
+ """
+ # first, a bit of twisted compat
+ with_password = kw.pop("includeSecrets", with_password)
+ if kw:
+ raise TypeError("got unexpected keyword arguments: %r" % kw.keys())
+ host = self.host
+ if ":" in host:
+ hostport = ["[" + host + "]"]
+ else:
+ hostport = [self.host]
+ if self.port != SCHEME_PORT_MAP.get(self.scheme):
+ hostport.append(Text(self.port))
+ authority = []
+ if self.userinfo:
+ userinfo = self.userinfo
+ if not with_password and u":" in userinfo:
+ userinfo = userinfo[: userinfo.index(u":") + 1]
+ authority.append(userinfo)
+ authority.append(u":".join(hostport))
+ return u"@".join(authority)
+
+ def __eq__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ for attr in [
+ "scheme",
+ "userinfo",
+ "host",
+ "query",
+ "fragment",
+ "port",
+ "uses_netloc",
+ "rooted",
+ ]:
+ if getattr(self, attr) != getattr(other, attr):
+ return False
+ if self.path == other.path or (
+ self.path in _ROOT_PATHS and other.path in _ROOT_PATHS
+ ):
+ return True
+ return False
+
+ def __ne__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return not self.__eq__(other)
+
+ def __hash__(self):
+ # type: () -> int
+ return hash(
+ (
+ self.__class__,
+ self.scheme,
+ self.userinfo,
+ self.host,
+ self.path,
+ self.query,
+ self.fragment,
+ self.port,
+ self.rooted,
+ self.uses_netloc,
+ )
+ )
+
+ @property
+ def absolute(self):
+ # type: () -> bool
+ """Whether or not the URL is "absolute". Absolute URLs are complete
+ enough to resolve to a network resource without being relative
+ to a base URI.
+
+ >>> URL.from_text(u'http://wikipedia.org/').absolute
+ True
+ >>> URL.from_text(u'?a=b&c=d').absolute
+ False
+
+ Absolute URLs must have both a scheme and a host set.
+ """
+ return bool(self.scheme and self.host)
+
+ def replace(
+ self,
+ scheme=_UNSET, # type: Optional[Text]
+ host=_UNSET, # type: Optional[Text]
+ path=_UNSET, # type: Iterable[Text]
+ query=_UNSET, # type: QueryParameters
+ fragment=_UNSET, # type: Text
+ port=_UNSET, # type: Optional[int]
+ rooted=_UNSET, # type: Optional[bool]
+ userinfo=_UNSET, # type: Text
+ uses_netloc=_UNSET, # type: Optional[bool]
+ ):
+ # type: (...) -> URL
+ """:class:`URL` objects are immutable, which means that attributes
+ are designed to be set only once, at construction. Instead of
+ modifying an existing URL, one simply creates a copy with the
+ desired changes.
+
+ If any of the following arguments is omitted, it defaults to
+ the value on the current URL.
+
+ Args:
+ scheme: The text name of the scheme.
+ host: The host portion of the network location.
+ path: A tuple of strings representing the slash-separated parts of
+ the path.
+ query: The query parameters, as a dictionary or as an sequence of
+ key-value pairs.
+ fragment: The fragment part of the URL.
+ port: The port part of the network location.
+ rooted: Whether or not the path begins with a slash.
+ userinfo: The username or colon-separated username:password pair.
+ uses_netloc: Indicates whether ``://`` (the "netloc separator")
+ will appear to separate the scheme from the *path* in cases
+ where no host is present.
+ Setting this to ``True`` is a non-spec-compliant affordance for
+ the common practice of having URIs that are *not* URLs (cannot
+ have a 'host' part) but nevertheless use the common ``://``
+ idiom that most people associate with URLs; e.g. ``message:``
+ URIs like ``message://message-id`` being equivalent to
+ ``message:message-id``.
+ This may be inferred based on the scheme depending on whether
+ :func:`register_scheme` has been used to register the scheme
+ and should not be passed directly unless you know the scheme
+ works like this and you know it has not been registered.
+
+ Returns:
+ URL: A copy of the current :class:`URL`, with new values for
+ parameters passed.
+ """
+ if scheme is not _UNSET and scheme != self.scheme:
+ # when changing schemes, reset the explicit uses_netloc preference
+ # to honor the new scheme.
+ uses_netloc = None
+ return self.__class__(
+ scheme=_optional(scheme, self.scheme),
+ host=_optional(host, self.host),
+ path=_optional(path, self.path),
+ query=_optional(query, self.query),
+ fragment=_optional(fragment, self.fragment),
+ port=_optional(port, self.port),
+ rooted=_optional(rooted, self.rooted),
+ userinfo=_optional(userinfo, self.userinfo),
+ uses_netloc=_optional(uses_netloc, self.uses_netloc),
+ )
+
+ @classmethod
+ def from_text(cls, text):
+ # type: (Text) -> URL
+ """Whereas the :class:`URL` constructor is useful for constructing
+ URLs from parts, :meth:`~URL.from_text` supports parsing whole
+ URLs from their string form::
+
+ >>> URL.from_text(u'http://example.com')
+ URL.from_text(u'http://example.com')
+ >>> URL.from_text(u'?a=b&x=y')
+ URL.from_text(u'?a=b&x=y')
+
+ As you can see above, it's also used as the :func:`repr` of
+ :class:`URL` objects. The natural counterpart to
+ :func:`~URL.to_text()`. This method only accepts *text*, so be
+ sure to decode those bytestrings.
+
+ Args:
+ text: A valid URL string.
+
+ Returns:
+ URL: The structured object version of the parsed string.
+
+ .. note::
+
+ Somewhat unexpectedly, URLs are a far more permissive
+ format than most would assume. Many strings which don't
+ look like URLs are still valid URLs. As a result, this
+ method only raises :class:`URLParseError` on invalid port
+ and IPv6 values in the host portion of the URL.
+ """
+ um = _URL_RE.match(_textcheck("text", text))
+ if um is None:
+ raise URLParseError("could not parse url: %r" % text)
+ gs = um.groupdict()
+
+ au_text = gs["authority"] or u""
+ au_m = _AUTHORITY_RE.match(au_text)
+ if au_m is None:
+ raise URLParseError(
+ "invalid authority %r in url: %r" % (au_text, text)
+ )
+ au_gs = au_m.groupdict()
+ if au_gs["bad_host"]:
+ raise URLParseError(
+ "invalid host %r in url: %r" % (au_gs["bad_host"], text)
+ )
+
+ userinfo = au_gs["userinfo"] or u""
+
+ host = au_gs["ipv6_host"] or au_gs["plain_host"]
+ port = au_gs["port"]
+ if port is not None:
+ try:
+ port = int(port) # type: ignore[assignment] # FIXME, see below
+ except ValueError:
+ if not port: # TODO: excessive?
+ raise URLParseError("port must not be empty: %r" % au_text)
+ raise URLParseError("expected integer for port, not %r" % port)
+
+ scheme = gs["scheme"] or u""
+ fragment = gs["fragment"] or u""
+ uses_netloc = bool(gs["_netloc_sep"])
+
+ if gs["path"]:
+ path = tuple(gs["path"].split(u"/"))
+ if not path[0]:
+ path = path[1:]
+ rooted = True
+ else:
+ rooted = False
+ else:
+ path = ()
+ rooted = bool(au_text)
+ if gs["query"]:
+ query = tuple(
+ (
+ qe.split(u"=", 1) # type: ignore[misc]
+ if u"=" in qe
+ else (qe, None)
+ )
+ for qe in gs["query"].split(u"&")
+ ) # type: QueryPairs
+ else:
+ query = ()
+ return cls(
+ scheme,
+ host,
+ path,
+ query,
+ fragment,
+ port, # type: ignore[arg-type] # FIXME, see above
+ rooted,
+ userinfo,
+ uses_netloc,
+ )
+
+ def normalize(
+ self,
+ scheme=True,
+ host=True,
+ path=True,
+ query=True,
+ fragment=True,
+ userinfo=True,
+ percents=True,
+ ):
+ # type: (bool, bool, bool, bool, bool, bool, bool) -> URL
+ """Return a new URL object with several standard normalizations
+ applied:
+
+ * Decode unreserved characters (`RFC 3986 2.3`_)
+ * Uppercase remaining percent-encoded octets (`RFC 3986 2.1`_)
+ * Convert scheme and host casing to lowercase (`RFC 3986 3.2.2`_)
+ * Resolve any "." and ".." references in the path (`RFC 3986 6.2.2.3`_)
+ * Ensure an ending slash on URLs with an empty path (`RFC 3986 6.2.3`_)
+ * Encode any stray percent signs (`%`) in percent-encoded
+ fields (path, query, fragment, userinfo) (`RFC 3986 2.4`_)
+
+ All are applied by default, but normalizations can be disabled
+ per-part by passing `False` for that part's corresponding
+ name.
+
+ Args:
+ scheme: Convert the scheme to lowercase
+ host: Convert the host to lowercase
+ path: Normalize the path (see above for details)
+ query: Normalize the query string
+ fragment: Normalize the fragment
+ userinfo: Normalize the userinfo
+ percents: Encode isolated percent signs for any percent-encoded
+ fields which are being normalized (defaults to `True`).
+
+ >>> url = URL.from_text(u'Http://example.COM/a/../b/./c%2f?%61%')
+ >>> print(url.normalize().to_text())
+ http://example.com/b/c%2F?a%25
+
+ .. _RFC 3986 3.2.2: https://tools.ietf.org/html/rfc3986#section-3.2.2
+ .. _RFC 3986 2.3: https://tools.ietf.org/html/rfc3986#section-2.3
+ .. _RFC 3986 2.1: https://tools.ietf.org/html/rfc3986#section-2.1
+ .. _RFC 3986 6.2.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.2.3
+ .. _RFC 3986 6.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.3
+ .. _RFC 3986 2.4: https://tools.ietf.org/html/rfc3986#section-2.4
+ """ # noqa: E501
+ kw = {} # type: Dict[str, Any]
+ if scheme:
+ kw["scheme"] = self.scheme.lower()
+ if host:
+ kw["host"] = self.host.lower()
+
+ def _dec_unres(target):
+ # type: (Text) -> Text
+ return _decode_unreserved(
+ target, normalize_case=True, encode_stray_percents=percents
+ )
+
+ if path:
+ if self.path:
+ kw["path"] = [
+ _dec_unres(p) for p in _resolve_dot_segments(self.path)
+ ]
+ else:
+ kw["path"] = (u"",)
+ if query:
+ kw["query"] = [
+ (_dec_unres(k), _dec_unres(v) if v else v)
+ for k, v in self.query
+ ]
+ if fragment:
+ kw["fragment"] = _dec_unres(self.fragment)
+ if userinfo:
+ kw["userinfo"] = u":".join(
+ [_dec_unres(p) for p in self.userinfo.split(":", 1)]
+ )
+
+ return self.replace(**kw)
+
+ def child(self, *segments):
+ # type: (Text) -> URL
+ """Make a new :class:`URL` where the given path segments are a child
+ of this URL, preserving other parts of the URL, including the
+ query string and fragment.
+
+ For example::
+
+ >>> url = URL.from_text(u'http://localhost/a/b?x=y')
+ >>> child_url = url.child(u"c", u"d")
+ >>> child_url.to_text()
+ u'http://localhost/a/b/c/d?x=y'
+
+ Args:
+ segments: Additional parts to be joined and added to the path, like
+ :func:`os.path.join`. Special characters in segments will be
+ percent encoded.
+
+ Returns:
+ URL: A copy of the current URL with the extra path segments.
+ """
+ if not segments:
+ return self
+
+ segments = [ # type: ignore[assignment] # variable is tuple
+ _textcheck("path segment", s) for s in segments
+ ]
+ new_path = tuple(self.path)
+ if self.path and self.path[-1] == u"":
+ new_path = new_path[:-1]
+ new_path += tuple(_encode_path_parts(segments, maximal=False))
+ return self.replace(path=new_path)
+
+ def sibling(self, segment):
+ # type: (Text) -> URL
+ """Make a new :class:`URL` with a single path segment that is a
+ sibling of this URL path.
+
+ Args:
+ segment: A single path segment.
+
+ Returns:
+ URL: A copy of the current URL with the last path segment
+ replaced by *segment*. Special characters such as
+ ``/?#`` will be percent encoded.
+ """
+ _textcheck("path segment", segment)
+ new_path = tuple(self.path)[:-1] + (_encode_path_part(segment),)
+ return self.replace(path=new_path)
+
+ def click(self, href=u""):
+ # type: (Union[Text, URL]) -> URL
+ """Resolve the given URL relative to this URL.
+
+ The resulting URI should match what a web browser would
+ generate if you visited the current URL and clicked on *href*.
+
+ >>> url = URL.from_text(u'http://blog.hatnote.com/')
+ >>> url.click(u'/post/155074058790').to_text()
+ u'http://blog.hatnote.com/post/155074058790'
+ >>> url = URL.from_text(u'http://localhost/a/b/c/')
+ >>> url.click(u'../d/./e').to_text()
+ u'http://localhost/a/b/d/e'
+
+ Args (Text):
+ href: A string representing a clicked URL.
+
+ Return:
+ A copy of the current URL with navigation logic applied.
+
+ For more information, see `RFC 3986 section 5`_.
+
+ .. _RFC 3986 section 5: https://tools.ietf.org/html/rfc3986#section-5
+ """
+ if href:
+ if isinstance(href, URL):
+ clicked = href
+ else:
+ # TODO: This error message is not completely accurate,
+ # as URL objects are now also valid, but Twisted's
+ # test suite (wrongly) relies on this exact message.
+ _textcheck("relative URL", href)
+ clicked = URL.from_text(href)
+ if clicked.absolute:
+ return clicked
+ else:
+ clicked = self
+
+ query = clicked.query
+ if clicked.scheme and not clicked.rooted:
+ # Schemes with relative paths are not well-defined. RFC 3986 calls
+ # them a "loophole in prior specifications" that should be avoided,
+ # or supported only for backwards compatibility.
+ raise NotImplementedError(
+ "absolute URI with rootless path: %r" % (href,)
+ )
+ else:
+ if clicked.rooted:
+ path = clicked.path
+ elif clicked.path:
+ path = tuple(self.path)[:-1] + tuple(clicked.path)
+ else:
+ path = self.path
+ if not query:
+ query = self.query
+ return self.replace(
+ scheme=clicked.scheme or self.scheme,
+ host=clicked.host or self.host,
+ port=clicked.port or self.port,
+ path=_resolve_dot_segments(path),
+ query=query,
+ fragment=clicked.fragment,
+ )
+
+ def to_uri(self):
+ # type: () -> URL
+ u"""Make a new :class:`URL` instance with all non-ASCII characters
+ appropriately percent-encoded. This is useful to do in preparation
+ for sending a :class:`URL` over a network protocol.
+
+ For example::
+
+ >>> URL.from_text(u'https://ايران.com/foo⇧bar/').to_uri()
+ URL.from_text(u'https://xn--mgba3a4fra.com/foo%E2%87%A7bar/')
+
+ Returns:
+ URL: A new instance with its path segments, query parameters, and
+ hostname encoded, so that they are all in the standard
+ US-ASCII range.
+ """
+ new_userinfo = u":".join(
+ [_encode_userinfo_part(p) for p in self.userinfo.split(":", 1)]
+ )
+ new_path = _encode_path_parts(
+ self.path, has_scheme=bool(self.scheme), rooted=False, maximal=True
+ )
+ new_host = (
+ self.host
+ if not self.host
+ else idna_encode(self.host, uts46=True).decode("ascii")
+ )
+ return self.replace(
+ userinfo=new_userinfo,
+ host=new_host,
+ path=new_path,
+ query=tuple(
+ [
+ (
+ _encode_query_key(k, maximal=True),
+ _encode_query_value(v, maximal=True)
+ if v is not None
+ else None,
+ )
+ for k, v in self.query
+ ]
+ ),
+ fragment=_encode_fragment_part(self.fragment, maximal=True),
+ )
+
+ def to_iri(self):
+ # type: () -> URL
+ u"""Make a new :class:`URL` instance with all but a few reserved
+ characters decoded into human-readable format.
+
+ Percent-encoded Unicode and IDNA-encoded hostnames are
+ decoded, like so::
+
+ >>> url = URL.from_text(u'https://xn--mgba3a4fra.example.com/foo%E2%87%A7bar/')
+ >>> print(url.to_iri().to_text())
+ https://ايران.example.com/foo⇧bar/
+
+ .. note::
+
+ As a general Python issue, "narrow" (UCS-2) builds of
+ Python may not be able to fully decode certain URLs, and
+ the in those cases, this method will return a best-effort,
+ partially-decoded, URL which is still valid. This issue
+ does not affect any Python builds 3.4+.
+
+ Returns:
+ URL: A new instance with its path segments, query parameters, and
+ hostname decoded for display purposes.
+ """ # noqa: E501
+ new_userinfo = u":".join(
+ [_decode_userinfo_part(p) for p in self.userinfo.split(":", 1)]
+ )
+ host_text = _decode_host(self.host)
+
+ return self.replace(
+ userinfo=new_userinfo,
+ host=host_text,
+ path=[_decode_path_part(segment) for segment in self.path],
+ query=tuple(
+ (
+ _decode_query_key(k),
+ _decode_query_value(v) if v is not None else None,
+ )
+ for k, v in self.query
+ ),
+ fragment=_decode_fragment_part(self.fragment),
+ )
+
+ def to_text(self, with_password=False):
+ # type: (bool) -> Text
+ """Render this URL to its textual representation.
+
+ By default, the URL text will *not* include a password, if one
+ is set. RFC 3986 considers using URLs to represent such
+ sensitive information as deprecated. Quoting from RFC 3986,
+ `section 3.2.1`:
+
+ "Applications should not render as clear text any data after the
+ first colon (":") character found within a userinfo subcomponent
+ unless the data after the colon is the empty string (indicating no
+ password)."
+
+ Args (bool):
+ with_password: Whether or not to include the password in the URL
+ text. Defaults to False.
+
+ Returns:
+ Text: The serialized textual representation of this URL, such as
+ ``u"http://example.com/some/path?some=query"``.
+
+ The natural counterpart to :class:`URL.from_text()`.
+
+ .. _section 3.2.1: https://tools.ietf.org/html/rfc3986#section-3.2.1
+ """
+ scheme = self.scheme
+ authority = self.authority(with_password)
+ path = "/".join(
+ _encode_path_parts(
+ self.path,
+ rooted=self.rooted,
+ has_scheme=bool(scheme),
+ has_authority=bool(authority),
+ maximal=False,
+ )
+ )
+ query_parts = []
+ for k, v in self.query:
+ if v is None:
+ query_parts.append(_encode_query_key(k, maximal=False))
+ else:
+ query_parts.append(
+ u"=".join(
+ (
+ _encode_query_key(k, maximal=False),
+ _encode_query_value(v, maximal=False),
+ )
+ )
+ )
+ query_string = u"&".join(query_parts)
+
+ fragment = self.fragment
+
+ parts = [] # type: List[Text]
+ _add = parts.append
+ if scheme:
+ _add(scheme)
+ _add(":")
+ if authority:
+ _add("//")
+ _add(authority)
+ elif scheme and path[:2] != "//" and self.uses_netloc:
+ _add("//")
+ if path:
+ if scheme and authority and path[:1] != "/":
+ _add("/") # relpaths with abs authorities auto get '/'
+ _add(path)
+ if query_string:
+ _add("?")
+ _add(query_string)
+ if fragment:
+ _add("#")
+ _add(fragment)
+ return u"".join(parts)
+
+ def __repr__(self):
+ # type: () -> str
+ """Convert this URL to an representation that shows all of its
+ constituent parts, as well as being a valid argument to
+ :func:`eval`.
+ """
+ return "%s.from_text(%r)" % (self.__class__.__name__, self.to_text())
+
+ def _to_bytes(self):
+ # type: () -> bytes
+ """
+ Allows for direct usage of URL objects with libraries like
+ requests, which automatically stringify URL parameters. See
+ issue #49.
+ """
+ return self.to_uri().to_text().encode("ascii")
+
+ if PY2:
+ __str__ = _to_bytes
+ __unicode__ = to_text
+ else:
+ __bytes__ = _to_bytes
+ __str__ = to_text
+
+ # # Begin Twisted Compat Code
+ asURI = to_uri
+ asIRI = to_iri
+
+ @classmethod
+ def fromText(cls, s):
+ # type: (Text) -> URL
+ return cls.from_text(s)
+
+ def asText(self, includeSecrets=False):
+ # type: (bool) -> Text
+ return self.to_text(with_password=includeSecrets)
+
+ def __dir__(self):
+ # type: () -> Sequence[Text]
+ try:
+ ret = object.__dir__(self)
+ except AttributeError:
+ # object.__dir__ == AttributeError # pdw for py2
+ ret = dir(self.__class__) + list(self.__dict__.keys())
+ ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"]))
+ return ret
+
+ # # End Twisted Compat Code
+
+ def add(self, name, value=None):
+ # type: (Text, Optional[Text]) -> URL
+ """Make a new :class:`URL` instance with a given query argument,
+ *name*, added to it with the value *value*, like so::
+
+ >>> URL.from_text(u'https://example.com/?x=y').add(u'x')
+ URL.from_text(u'https://example.com/?x=y&x')
+ >>> URL.from_text(u'https://example.com/?x=y').add(u'x', u'z')
+ URL.from_text(u'https://example.com/?x=y&x=z')
+
+ Args:
+ name: The name of the query parameter to add.
+ The part before the ``=``.
+ value: The value of the query parameter to add.
+ The part after the ``=``.
+ Defaults to ``None``, meaning no value.
+
+ Returns:
+ URL: A new :class:`URL` instance with the parameter added.
+ """
+ return self.replace(query=self.query + ((name, value),))
+
+ def set(self, name, value=None):
+ # type: (Text, Optional[Text]) -> URL
+ """Make a new :class:`URL` instance with the query parameter *name*
+ set to *value*. All existing occurences, if any are replaced
+ by the single name-value pair.
+
+ >>> URL.from_text(u'https://example.com/?x=y').set(u'x')
+ URL.from_text(u'https://example.com/?x')
+ >>> URL.from_text(u'https://example.com/?x=y').set(u'x', u'z')
+ URL.from_text(u'https://example.com/?x=z')
+
+ Args:
+ name: The name of the query parameter to set.
+ The part before the ``=``.
+ value: The value of the query parameter to set.
+ The part after the ``=``.
+ Defaults to ``None``, meaning no value.
+
+ Returns:
+ URL: A new :class:`URL` instance with the parameter set.
+ """
+ # Preserve the original position of the query key in the list
+ q = [(k, v) for (k, v) in self.query if k != name]
+ idx = next(
+ (i for (i, (k, v)) in enumerate(self.query) if k == name), -1
+ )
+ q[idx:idx] = [(name, value)]
+ return self.replace(query=q)
+
+ def get(self, name):
+ # type: (Text) -> List[Optional[Text]]
+ """Get a list of values for the given query parameter, *name*::
+
+ >>> url = URL.from_text(u'?x=1&x=2')
+ >>> url.get('x')
+ [u'1', u'2']
+ >>> url.get('y')
+ []
+
+ If the given *name* is not set, an empty list is returned. A
+ list is always returned, and this method raises no exceptions.
+
+ Args:
+ name: The name of the query parameter to get.
+
+ Returns:
+ List[Optional[Text]]: A list of all the values associated with the
+ key, in string form.
+ """
+ return [value for (key, value) in self.query if name == key]
+
+ def remove(
+ self,
+ name, # type: Text
+ value=_UNSET, # type: Text
+ limit=None, # type: Optional[int]
+ ):
+ # type: (...) -> URL
+ """Make a new :class:`URL` instance with occurrences of the query
+ parameter *name* removed, or, if *value* is set, parameters
+ matching *name* and *value*. No exception is raised if the
+ parameter is not already set.
+
+ Args:
+ name: The name of the query parameter to remove.
+ value: Optional value to additionally filter on.
+ Setting this removes query parameters which match both name
+ and value.
+ limit: Optional maximum number of parameters to remove.
+
+ Returns:
+ URL: A new :class:`URL` instance with the parameter removed.
+ """
+ if limit is None:
+ if value is _UNSET:
+ nq = [(k, v) for (k, v) in self.query if k != name]
+ else:
+ nq = [
+ (k, v)
+ for (k, v) in self.query
+ if not (k == name and v == value)
+ ]
+ else:
+ nq, removed_count = [], 0
+
+ for k, v in self.query:
+ if (
+ k == name
+ and (value is _UNSET or v == value)
+ and removed_count < limit
+ ):
+ removed_count += 1 # drop it
+ else:
+ nq.append((k, v)) # keep it
+
+ return self.replace(query=nq)
+
+
+EncodedURL = URL # An alias better describing what the URL really is
+
+_EMPTY_URL = URL()
+
+
+def _replace_plus(text):
+ # type: (Text) -> Text
+ return text.replace("+", "%20")
+
+
+def _no_op(text):
+ # type: (Text) -> Text
+ return text
+
+
+class DecodedURL(object):
+ """
+ :class:`DecodedURL` is a type designed to act as a higher-level
+ interface to :class:`URL` and the recommended type for most
+ operations. By analogy, :class:`DecodedURL` is the
+ :class:`unicode` to URL's :class:`bytes`.
+
+ :class:`DecodedURL` automatically handles encoding and decoding
+ all its components, such that all inputs and outputs are in a
+ maximally-decoded state. Note that this means, for some special
+ cases, a URL may not "roundtrip" character-for-character, but this
+ is considered a good tradeoff for the safety of automatic
+ encoding.
+
+ Otherwise, :class:`DecodedURL` has almost exactly the same API as
+ :class:`URL`.
+
+ Where applicable, a UTF-8 encoding is presumed. Be advised that
+ some interactions can raise :exc:`UnicodeEncodeErrors` and
+ :exc:`UnicodeDecodeErrors`, just like when working with
+ bytestrings. Examples of such interactions include handling query
+ strings encoding binary data, and paths containing segments with
+ special characters encoded with codecs other than UTF-8.
+
+ Args:
+ url: A :class:`URL` object to wrap.
+ lazy: Set to True to avoid pre-decode all parts of the URL to check for
+ validity.
+ Defaults to False.
+ query_plus_is_space: + characters in the query string should be treated
+ as spaces when decoding. If unspecified, the default is taken from
+ the scheme.
+
+ .. note::
+
+ The :class:`DecodedURL` initializer takes a :class:`URL` object,
+ not URL components, like :class:`URL`. To programmatically
+ construct a :class:`DecodedURL`, you can use this pattern:
+
+ >>> print(DecodedURL().replace(scheme=u'https',
+ ... host=u'pypi.org', path=(u'projects', u'hyperlink')).to_text())
+ https://pypi.org/projects/hyperlink
+
+ .. versionadded:: 18.0.0
+ """
+
+ def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None):
+ # type: (URL, bool, Optional[bool]) -> None
+ self._url = url
+ if query_plus_is_space is None:
+ query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES
+ self._query_plus_is_space = query_plus_is_space
+ if not lazy:
+ # cache the following, while triggering any decoding
+ # issues with decodable fields
+ self.host, self.userinfo, self.path, self.query, self.fragment
+ return
+
+ @classmethod
+ def from_text(cls, text, lazy=False, query_plus_is_space=None):
+ # type: (Text, bool, Optional[bool]) -> DecodedURL
+ """\
+ Make a `DecodedURL` instance from any text string containing a URL.
+
+ Args:
+ text: Text containing the URL
+ lazy: Whether to pre-decode all parts of the URL to check for
+ validity.
+ Defaults to True.
+ """
+ _url = URL.from_text(text)
+ return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space)
+
+ @property
+ def encoded_url(self):
+ # type: () -> URL
+ """Access the underlying :class:`URL` object, which has any special
+ characters encoded.
+ """
+ return self._url
+
+ def to_text(self, with_password=False):
+ # type: (bool) -> Text
+ "Passthrough to :meth:`~hyperlink.URL.to_text()`"
+ return self._url.to_text(with_password)
+
+ def to_uri(self):
+ # type: () -> URL
+ "Passthrough to :meth:`~hyperlink.URL.to_uri()`"
+ return self._url.to_uri()
+
+ def to_iri(self):
+ # type: () -> URL
+ "Passthrough to :meth:`~hyperlink.URL.to_iri()`"
+ return self._url.to_iri()
+
+ def _clone(self, url):
+ # type: (URL) -> DecodedURL
+ return self.__class__(
+ url,
+ # TODO: propagate laziness?
+ query_plus_is_space=self._query_plus_is_space,
+ )
+
+ def click(self, href=u""):
+ # type: (Union[Text, URL, DecodedURL]) -> DecodedURL
+ """Return a new DecodedURL wrapping the result of
+ :meth:`~hyperlink.URL.click()`
+ """
+ if isinstance(href, DecodedURL):
+ href = href._url
+ return self._clone(
+ self._url.click(href=href),
+ )
+
+ def sibling(self, segment):
+ # type: (Text) -> DecodedURL
+ """Automatically encode any reserved characters in *segment* and
+ return a new `DecodedURL` wrapping the result of
+ :meth:`~hyperlink.URL.sibling()`
+ """
+ return self._clone(
+ self._url.sibling(_encode_reserved(segment)),
+ )
+
+ def child(self, *segments):
+ # type: (Text) -> DecodedURL
+ """Automatically encode any reserved characters in *segments* and
+ return a new `DecodedURL` wrapping the result of
+ :meth:`~hyperlink.URL.child()`.
+ """
+ if not segments:
+ return self
+ new_segs = [_encode_reserved(s) for s in segments]
+ return self._clone(self._url.child(*new_segs))
+
+ def normalize(
+ self,
+ scheme=True,
+ host=True,
+ path=True,
+ query=True,
+ fragment=True,
+ userinfo=True,
+ percents=True,
+ ):
+ # type: (bool, bool, bool, bool, bool, bool, bool) -> DecodedURL
+ """Return a new `DecodedURL` wrapping the result of
+ :meth:`~hyperlink.URL.normalize()`
+ """
+ return self._clone(
+ self._url.normalize(
+ scheme, host, path, query, fragment, userinfo, percents
+ )
+ )
+
+ @property
+ def absolute(self):
+ # type: () -> bool
+ return self._url.absolute
+
+ @property
+ def scheme(self):
+ # type: () -> Text
+ return self._url.scheme
+
+ @property
+ def host(self):
+ # type: () -> Text
+ return _decode_host(self._url.host)
+
+ @property
+ def port(self):
+ # type: () -> Optional[int]
+ return self._url.port
+
+ @property
+ def rooted(self):
+ # type: () -> bool
+ return self._url.rooted
+
+ @property
+ def path(self):
+ # type: () -> Sequence[Text]
+ if not hasattr(self, "_path"):
+ self._path = tuple(
+ [
+ _percent_decode(p, raise_subencoding_exc=True)
+ for p in self._url.path
+ ]
+ )
+ return self._path
+
+ @property
+ def query(self):
+ # type: () -> QueryPairs
+ if not hasattr(self, "_query"):
+ if self._query_plus_is_space:
+ predecode = _replace_plus
+ else:
+ predecode = _no_op
+
+ self._query = cast(
+ QueryPairs,
+ tuple(
+ tuple(
+ _percent_decode(
+ predecode(x), raise_subencoding_exc=True
+ )
+ if x is not None
+ else None
+ for x in (k, v)
+ )
+ for k, v in self._url.query
+ ),
+ )
+ return self._query
+
+ @property
+ def fragment(self):
+ # type: () -> Text
+ if not hasattr(self, "_fragment"):
+ frag = self._url.fragment
+ self._fragment = _percent_decode(frag, raise_subencoding_exc=True)
+ return self._fragment
+
+ @property
+ def userinfo(self):
+ # type: () -> Union[Tuple[str], Tuple[str, str]]
+ if not hasattr(self, "_userinfo"):
+ self._userinfo = cast(
+ Union[Tuple[str], Tuple[str, str]],
+ tuple(
+ tuple(
+ _percent_decode(p, raise_subencoding_exc=True)
+ for p in self._url.userinfo.split(":", 1)
+ )
+ ),
+ )
+ return self._userinfo
+
+ @property
+ def user(self):
+ # type: () -> Text
+ return self.userinfo[0]
+
+ @property
+ def uses_netloc(self):
+ # type: () -> Optional[bool]
+ return self._url.uses_netloc
+
+ def replace(
+ self,
+ scheme=_UNSET, # type: Optional[Text]
+ host=_UNSET, # type: Optional[Text]
+ path=_UNSET, # type: Iterable[Text]
+ query=_UNSET, # type: QueryParameters
+ fragment=_UNSET, # type: Text
+ port=_UNSET, # type: Optional[int]
+ rooted=_UNSET, # type: Optional[bool]
+ userinfo=_UNSET, # type: Union[Tuple[str], Tuple[str, str]]
+ uses_netloc=_UNSET, # type: Optional[bool]
+ ):
+ # type: (...) -> DecodedURL
+ """While the signature is the same, this `replace()` differs a little
+ from URL.replace. For instance, it accepts userinfo as a
+ tuple, not as a string, handling the case of having a username
+ containing a `:`. As with the rest of the methods on
+ DecodedURL, if you pass a reserved character, it will be
+ automatically encoded instead of an error being raised.
+ """
+ if path is not _UNSET:
+ path = tuple(_encode_reserved(p) for p in path)
+ if query is not _UNSET:
+ query = cast(
+ QueryPairs,
+ tuple(
+ tuple(
+ _encode_reserved(x) if x is not None else None
+ for x in (k, v)
+ )
+ for k, v in iter_pairs(query)
+ ),
+ )
+ if userinfo is not _UNSET:
+ if len(userinfo) > 2:
+ raise ValueError(
+ 'userinfo expected sequence of ["user"] or'
+ ' ["user", "password"], got %r' % (userinfo,)
+ )
+ userinfo_text = u":".join([_encode_reserved(p) for p in userinfo])
+ else:
+ userinfo_text = _UNSET
+ new_url = self._url.replace(
+ scheme=scheme,
+ host=host,
+ path=path,
+ query=query,
+ fragment=fragment,
+ port=port,
+ rooted=rooted,
+ userinfo=userinfo_text,
+ uses_netloc=uses_netloc,
+ )
+ return self._clone(url=new_url)
+
+ def get(self, name):
+ # type: (Text) -> List[Optional[Text]]
+ "Get the value of all query parameters whose name matches *name*"
+ return [v for (k, v) in self.query if name == k]
+
+ def add(self, name, value=None):
+ # type: (Text, Optional[Text]) -> DecodedURL
+ """Return a new DecodedURL with the query parameter *name* and *value*
+ added."""
+ return self.replace(query=self.query + ((name, value),))
+
+ def set(self, name, value=None):
+ # type: (Text, Optional[Text]) -> DecodedURL
+ "Return a new DecodedURL with query parameter *name* set to *value*"
+ query = self.query
+ q = [(k, v) for (k, v) in query if k != name]
+ idx = next((i for (i, (k, v)) in enumerate(query) if k == name), -1)
+ q[idx:idx] = [(name, value)]
+ return self.replace(query=q)
+
+ def remove(
+ self,
+ name, # type: Text
+ value=_UNSET, # type: Text
+ limit=None, # type: Optional[int]
+ ):
+ # type: (...) -> DecodedURL
+ """Return a new DecodedURL with query parameter *name* removed.
+
+ Optionally also filter for *value*, as well as cap the number
+ of parameters removed with *limit*.
+ """
+ if limit is None:
+ if value is _UNSET:
+ nq = [(k, v) for (k, v) in self.query if k != name]
+ else:
+ nq = [
+ (k, v)
+ for (k, v) in self.query
+ if not (k == name and v == value)
+ ]
+ else:
+ nq, removed_count = [], 0
+ for k, v in self.query:
+ if (
+ k == name
+ and (value is _UNSET or v == value)
+ and removed_count < limit
+ ):
+ removed_count += 1 # drop it
+ else:
+ nq.append((k, v)) # keep it
+
+ return self.replace(query=nq)
+
+ def __repr__(self):
+ # type: () -> str
+ cn = self.__class__.__name__
+ return "%s(url=%r)" % (cn, self._url)
+
+ def __str__(self):
+ # type: () -> str
+ # TODO: the underlying URL's __str__ needs to change to make
+ # this work as the URL, see #55
+ return str(self._url)
+
+ def __eq__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return self.normalize().to_uri() == other.normalize().to_uri()
+
+ def __ne__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return not self.__eq__(other)
+
+ def __hash__(self):
+ # type: () -> int
+ return hash(
+ (
+ self.__class__,
+ self.scheme,
+ self.userinfo,
+ self.host,
+ self.path,
+ self.query,
+ self.fragment,
+ self.port,
+ self.rooted,
+ self.uses_netloc,
+ )
+ )
+
+ # # Begin Twisted Compat Code
+ asURI = to_uri
+ asIRI = to_iri
+
+ @classmethod
+ def fromText(cls, s, lazy=False):
+ # type: (Text, bool) -> DecodedURL
+ return cls.from_text(s, lazy=lazy)
+
+ def asText(self, includeSecrets=False):
+ # type: (bool) -> Text
+ return self.to_text(with_password=includeSecrets)
+
+ def __dir__(self):
+ # type: () -> Sequence[Text]
+ try:
+ ret = object.__dir__(self)
+ except AttributeError:
+ # object.__dir__ == AttributeError # pdw for py2
+ ret = dir(self.__class__) + list(self.__dict__.keys())
+ ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"]))
+ return ret
+
+ # # End Twisted Compat Code
+
+
+# Add some overloads so that parse gives a better return value.
+@overload
+def parse(url, decoded, lazy=False):
+ # type: (Text, Literal[False], bool) -> URL
+ """Passing decoded=False returns URL."""
+
+
+@overload
+def parse(url, decoded=True, lazy=False):
+ # type: (Text, Literal[True], bool) -> DecodedURL
+ """Passing decoded=True (or the default value) returns DecodedURL."""
+
+
+@overload
+def parse(url, decoded=True, lazy=False):
+ # type: (Text, bool, bool) -> Union[URL, DecodedURL]
+ """If decoded is not a literal we don't know the return type."""
+
+
+def parse(url, decoded=True, lazy=False):
+ # type: (Text, bool, bool) -> Union[URL, DecodedURL]
+ """
+ Automatically turn text into a structured URL object.
+
+ >>> url = parse(u"https://github.com/python-hyper/hyperlink")
+ >>> print(url.to_text())
+ https://github.com/python-hyper/hyperlink
+
+ Args:
+ url: A text string representation of a URL.
+
+ decoded: Whether or not to return a :class:`DecodedURL`,
+ which automatically handles all
+ encoding/decoding/quoting/unquoting for all the various
+ accessors of parts of the URL, or a :class:`URL`,
+ which has the same API, but requires handling of special
+ characters for different parts of the URL.
+
+ lazy: In the case of `decoded=True`, this controls
+ whether the URL is decoded immediately or as accessed. The
+ default, `lazy=False`, checks all encoded parts of the URL
+ for decodability.
+
+ .. versionadded:: 18.0.0
+ """
+ enc_url = EncodedURL.from_text(url)
+ if not decoded:
+ return enc_url
+ dec_url = DecodedURL(enc_url, lazy=lazy)
+ return dec_url
diff --git a/src/hyperlink/hypothesis.py b/src/hyperlink/hypothesis.py
new file mode 100644
index 00000000..4ab987eb
--- /dev/null
+++ b/src/hyperlink/hypothesis.py
@@ -0,0 +1,321 @@
+# -*- coding: utf-8 -*-
+"""
+Hypothesis strategies.
+"""
+from __future__ import absolute_import
+
+try:
+ import hypothesis
+
+ del hypothesis
+except ImportError:
+ from typing import Tuple
+
+ __all__ = () # type: Tuple[str, ...]
+else:
+ from csv import reader as csv_reader
+ from os.path import dirname, join
+ from string import ascii_letters, digits
+ from sys import maxunicode
+ from typing import (
+ Callable,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+ Text,
+ TypeVar,
+ cast,
+ )
+ from gzip import open as open_gzip
+
+ from . import DecodedURL, EncodedURL
+
+ from hypothesis import assume
+ from hypothesis.strategies import (
+ composite,
+ integers,
+ lists,
+ sampled_from,
+ text,
+ )
+
+ from idna import IDNAError, check_label, encode as idna_encode
+
+ __all__ = (
+ "decoded_urls",
+ "encoded_urls",
+ "hostname_labels",
+ "hostnames",
+ "idna_text",
+ "paths",
+ "port_numbers",
+ )
+
+ T = TypeVar("T")
+ DrawCallable = Callable[[Callable[..., T]], T]
+
+ try:
+ unichr
+ except NameError: # Py3
+ unichr = chr # type: Callable[[int], Text]
+
+ def idna_characters():
+ # type: () -> Text
+ """
+ Returns a string containing IDNA characters.
+ """
+ global _idnaCharacters
+
+ if not _idnaCharacters:
+ result = []
+
+ # Data source "IDNA Derived Properties":
+ # https://www.iana.org/assignments/idna-tables-6.3.0/
+ # idna-tables-6.3.0.xhtml#idna-tables-properties
+ dataFileName = join(
+ dirname(__file__), "idna-tables-properties.csv.gz"
+ )
+ with open_gzip(dataFileName) as dataFile:
+ reader = csv_reader(
+ (line.decode("utf-8") for line in dataFile),
+ delimiter=",",
+ )
+ next(reader) # Skip header row
+ for row in reader:
+ codes, prop, description = row
+
+ if prop != "PVALID":
+ # CONTEXTO or CONTEXTJ are also allowed, but they come
+ # with rules, so we're punting on those here.
+ # See: https://tools.ietf.org/html/rfc5892
+ continue
+
+ startEnd = row[0].split("-", 1)
+ if len(startEnd) == 1:
+ # No end of range given; use start
+ startEnd.append(startEnd[0])
+ start, end = (int(i, 16) for i in startEnd)
+
+ for i in range(start, end + 1):
+ if i > maxunicode: # Happens using Py2 on Windows
+ break
+ result.append(unichr(i))
+
+ _idnaCharacters = u"".join(result)
+
+ return _idnaCharacters
+
+ _idnaCharacters = "" # type: Text
+
+ @composite
+ def idna_text(draw, min_size=1, max_size=None):
+ # type: (DrawCallable, int, Optional[int]) -> Text
+ """
+ A strategy which generates IDNA-encodable text.
+
+ @param min_size: The minimum number of characters in the text.
+ C{None} is treated as C{0}.
+
+ @param max_size: The maximum number of characters in the text.
+ Use C{None} for an unbounded size.
+ """
+ alphabet = idna_characters()
+
+ assert min_size >= 1
+
+ if max_size is not None:
+ assert max_size >= 1
+
+ result = cast(
+ Text,
+ draw(text(min_size=min_size, max_size=max_size, alphabet=alphabet)),
+ )
+
+ # FIXME: There should be a more efficient way to ensure we produce
+ # valid IDNA text.
+ try:
+ idna_encode(result)
+ except IDNAError:
+ assume(False)
+
+ return result
+
+ @composite
+ def port_numbers(draw, allow_zero=False):
+ # type: (DrawCallable, bool) -> int
+ """
+ A strategy which generates port numbers.
+
+ @param allow_zero: Whether to allow port C{0} as a possible value.
+ """
+ if allow_zero:
+ min_value = 0
+ else:
+ min_value = 1
+
+ return cast(int, draw(integers(min_value=min_value, max_value=65535)))
+
+ @composite
+ def hostname_labels(draw, allow_idn=True):
+ # type: (DrawCallable, bool) -> Text
+ """
+ A strategy which generates host name labels.
+
+ @param allow_idn: Whether to allow non-ASCII characters as allowed by
+ internationalized domain names (IDNs).
+ """
+ if allow_idn:
+ label = cast(Text, draw(idna_text(min_size=1, max_size=63)))
+
+ try:
+ label.encode("ascii")
+ except UnicodeEncodeError:
+ # If the label doesn't encode to ASCII, then we need to check
+ # the length of the label after encoding to punycode and adding
+ # the xn-- prefix.
+ while len(label.encode("punycode")) > 63 - len("xn--"):
+ # Rather than bombing out, just trim from the end until it
+ # is short enough, so hypothesis doesn't have to generate
+ # new data.
+ label = label[:-1]
+
+ else:
+ label = cast(
+ Text,
+ draw(
+ text(
+ min_size=1,
+ max_size=63,
+ alphabet=Text(ascii_letters + digits + u"-"),
+ )
+ ),
+ )
+
+ # Filter invalid labels.
+ # It would be better to reliably avoid generation of bogus labels in
+ # the first place, but it's hard...
+ try:
+ check_label(label)
+ except UnicodeError: # pragma: no cover (not always drawn)
+ assume(False)
+
+ return label
+
+ @composite
+ def hostnames(draw, allow_leading_digit=True, allow_idn=True):
+ # type: (DrawCallable, bool, bool) -> Text
+ """
+ A strategy which generates host names.
+
+ @param allow_leading_digit: Whether to allow a leading digit in host
+ names; they were not allowed prior to RFC 1123.
+
+ @param allow_idn: Whether to allow non-ASCII characters as allowed by
+ internationalized domain names (IDNs).
+ """
+ # Draw first label, filtering out labels with leading digits if needed
+ labels = [
+ cast(
+ Text,
+ draw(
+ hostname_labels(allow_idn=allow_idn).filter(
+ lambda l: (
+ True if allow_leading_digit else l[0] not in digits
+ )
+ )
+ ),
+ )
+ ]
+ # Draw remaining labels
+ labels += cast(
+ List[Text],
+ draw(
+ lists(
+ hostname_labels(allow_idn=allow_idn),
+ min_size=1,
+ max_size=4,
+ )
+ ),
+ )
+
+ # Trim off labels until the total host name length fits in 252
+ # characters. This avoids having to filter the data.
+ while sum(len(label) for label in labels) + len(labels) - 1 > 252:
+ labels = labels[:-1]
+
+ return u".".join(labels)
+
+ def path_characters():
+ # type: () -> str
+ """
+ Returns a string containing valid URL path characters.
+ """
+ global _path_characters
+
+ if _path_characters is None:
+
+ def chars():
+ # type: () -> Iterable[Text]
+ for i in range(maxunicode):
+ c = unichr(i)
+
+ # Exclude reserved characters
+ if c in "#/?":
+ continue
+
+ # Exclude anything not UTF-8 compatible
+ try:
+ c.encode("utf-8")
+ except UnicodeEncodeError:
+ continue
+
+ yield c
+
+ _path_characters = "".join(chars())
+
+ return _path_characters
+
+ _path_characters = None # type: Optional[str]
+
+ @composite
+ def paths(draw):
+ # type: (DrawCallable) -> Sequence[Text]
+ return cast(
+ List[Text],
+ draw(
+ lists(text(min_size=1, alphabet=path_characters()), max_size=10)
+ ),
+ )
+
+ @composite
+ def encoded_urls(draw):
+ # type: (DrawCallable) -> EncodedURL
+ """
+ A strategy which generates L{EncodedURL}s.
+ Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
+ protocol-friendly URI.
+ """
+ port = cast(Optional[int], draw(port_numbers(allow_zero=True)))
+ host = cast(Text, draw(hostnames()))
+ path = cast(Sequence[Text], draw(paths()))
+
+ if port == 0:
+ port = None
+
+ return EncodedURL(
+ scheme=cast(Text, draw(sampled_from((u"http", u"https")))),
+ host=host,
+ port=port,
+ path=path,
+ )
+
+ @composite
+ def decoded_urls(draw):
+ # type: (DrawCallable) -> DecodedURL
+ """
+ A strategy which generates L{DecodedURL}s.
+ Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
+ protocol-friendly URI.
+ """
+ return DecodedURL(draw(encoded_urls()))
diff --git a/src/hyperlink/idna-tables-properties.csv.gz b/src/hyperlink/idna-tables-properties.csv.gz
new file mode 100644
index 00000000..48e9f067
Binary files /dev/null and b/src/hyperlink/idna-tables-properties.csv.gz differ
diff --git a/src/hyperlink/py.typed b/src/hyperlink/py.typed
new file mode 100644
index 00000000..d2dfd5e4
--- /dev/null
+++ b/src/hyperlink/py.typed
@@ -0,0 +1 @@
+# See: https://www.python.org/dev/peps/pep-0561/
diff --git a/src/hyperlink/test/__init__.py b/src/hyperlink/test/__init__.py
new file mode 100644
index 00000000..e10ca70f
--- /dev/null
+++ b/src/hyperlink/test/__init__.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for hyperlink
+"""
+
+__all = ()
+
+
+def _init_hypothesis():
+ # type: () -> None
+ from os import environ
+
+ if "CI" in environ:
+ try:
+ from hypothesis import HealthCheck, settings
+ except ImportError:
+ return
+
+ settings.register_profile(
+ "patience",
+ settings(
+ suppress_health_check=[
+ HealthCheck.too_slow,
+ HealthCheck.filter_too_much,
+ ]
+ ),
+ )
+ settings.load_profile("patience")
+
+
+_init_hypothesis()
diff --git a/src/hyperlink/test/common.py b/src/hyperlink/test/common.py
new file mode 100644
index 00000000..ad3bd04a
--- /dev/null
+++ b/src/hyperlink/test/common.py
@@ -0,0 +1,68 @@
+from typing import Any, Callable, Optional, Type
+from unittest import TestCase
+
+
+class HyperlinkTestCase(TestCase):
+ """This type mostly exists to provide a backwards-compatible
+ assertRaises method for Python 2.6 testing.
+ """
+
+ def assertRaises( # type: ignore[override]
+ self,
+ expected_exception, # type: Type[BaseException]
+ callableObj=None, # type: Optional[Callable[..., Any]]
+ *args, # type: Any
+ **kwargs # type: Any
+ ):
+ # type: (...) -> Any
+ """Fail unless an exception of class expected_exception is raised
+ by callableObj when invoked with arguments args and keyword
+ arguments kwargs. If a different type of exception is
+ raised, it will not be caught, and the test case will be
+ deemed to have suffered an error, exactly as for an
+ unexpected exception.
+
+ If called with callableObj omitted or None, will return a
+ context object used like this::
+
+ with self.assertRaises(SomeException):
+ do_something()
+
+ The context manager keeps a reference to the exception as
+ the 'exception' attribute. This allows you to inspect the
+ exception after the assertion::
+
+ with self.assertRaises(SomeException) as cm:
+ do_something()
+ the_exception = cm.exception
+ self.assertEqual(the_exception.error_code, 3)
+ """
+ context = _AssertRaisesContext(expected_exception, self)
+ if callableObj is None:
+ return context
+ with context:
+ callableObj(*args, **kwargs)
+
+
+class _AssertRaisesContext(object):
+ "A context manager used to implement HyperlinkTestCase.assertRaises."
+
+ def __init__(self, expected, test_case):
+ # type: (Type[BaseException], TestCase) -> None
+ self.expected = expected
+ self.failureException = test_case.failureException
+
+ def __enter__(self):
+ # type: () -> "_AssertRaisesContext"
+ return self
+
+ def __exit__(self, exc_type, exc_value, tb):
+ # type: (Optional[Type[BaseException]], Any, Any) -> bool
+ if exc_type is None:
+ exc_name = self.expected.__name__
+ raise self.failureException("%s not raised" % (exc_name,))
+ if not issubclass(exc_type, self.expected):
+ # let unexpected exceptions pass through
+ return False
+ self.exception = exc_value # store for later retrieval
+ return True
diff --git a/hyperlink/test/test_common.py b/src/hyperlink/test/test_common.py
similarity index 80%
rename from hyperlink/test/test_common.py
rename to src/hyperlink/test/test_common.py
index 1d61583c..dc5e5bb8 100644
--- a/hyperlink/test/test_common.py
+++ b/src/hyperlink/test/test_common.py
@@ -1,29 +1,28 @@
"""
Tests for hyperlink.test.common
"""
+from typing import Any
from unittest import TestCase
from .common import HyperlinkTestCase
class _ExpectedException(Exception):
- """An exception used to test HyperlinkTestCase.assertRaises.
-
- """
+ """An exception used to test HyperlinkTestCase.assertRaises."""
class _UnexpectedException(Exception):
- """An exception used to test HyperlinkTestCase.assertRaises.
-
- """
+ """An exception used to test HyperlinkTestCase.assertRaises."""
class TestHyperlink(TestCase):
"""Tests for HyperlinkTestCase"""
def setUp(self):
+ # type: () -> None
self.hyperlink_test = HyperlinkTestCase("run")
def test_assertRaisesWithCallable(self):
+ # type: () -> None
"""HyperlinkTestCase.assertRaises does not raise an AssertionError
when given a callable that, when called with the provided
arguments, raises the expected exception.
@@ -32,44 +31,51 @@ def test_assertRaisesWithCallable(self):
called_with = []
def raisesExpected(*args, **kwargs):
+ # type: (Any, Any) -> None
called_with.append((args, kwargs))
raise _ExpectedException
- self.hyperlink_test.assertRaises(_ExpectedException,
- raisesExpected, 1, keyword=True)
+ self.hyperlink_test.assertRaises(
+ _ExpectedException, raisesExpected, 1, keyword=True
+ )
self.assertEqual(called_with, [((1,), {"keyword": True})])
def test_assertRaisesWithCallableUnexpectedException(self):
+ # type: () -> None
"""When given a callable that raises an unexpected exception,
HyperlinkTestCase.assertRaises raises that exception.
"""
def doesNotRaiseExpected(*args, **kwargs):
+ # type: (Any, Any) -> None
raise _UnexpectedException
try:
- self.hyperlink_test.assertRaises(_ExpectedException,
- doesNotRaiseExpected)
+ self.hyperlink_test.assertRaises(
+ _ExpectedException, doesNotRaiseExpected
+ )
except _UnexpectedException:
pass
def test_assertRaisesWithCallableDoesNotRaise(self):
+ # type: () -> None
"""HyperlinkTestCase.assertRaises raises an AssertionError when given
a callable that, when called, does not raise any exception.
"""
def doesNotRaise(*args, **kwargs):
- return True
+ # type: (Any, Any) -> None
+ pass
try:
- self.hyperlink_test.assertRaises(_ExpectedException,
- doesNotRaise)
+ self.hyperlink_test.assertRaises(_ExpectedException, doesNotRaise)
except AssertionError:
pass
def test_assertRaisesContextManager(self):
+ # type: () -> None
"""HyperlinkTestCase.assertRaises does not raise an AssertionError
when used as a context manager with a suite that raises the
expected exception. The context manager stores the exception
@@ -79,9 +85,12 @@ def test_assertRaisesContextManager(self):
with self.hyperlink_test.assertRaises(_ExpectedException) as cm:
raise _ExpectedException
- self.assertTrue(isinstance(cm.exception, _ExpectedException))
+ self.assertTrue( # type: ignore[unreachable]
+ isinstance(cm.exception, _ExpectedException)
+ )
def test_assertRaisesContextManagerUnexpectedException(self):
+ # type: () -> None
"""When used as a context manager with a block that raises an
unexpected exception, HyperlinkTestCase.assertRaises raises
that unexpected exception.
@@ -94,6 +103,7 @@ def test_assertRaisesContextManagerUnexpectedException(self):
pass
def test_assertRaisesContextManagerDoesNotRaise(self):
+ # type: () -> None
"""HyperlinkTestcase.assertRaises raises an AssertionError when used
as a context manager with a block that does not raise any
exception.
diff --git a/src/hyperlink/test/test_decoded_url.py b/src/hyperlink/test/test_decoded_url.py
new file mode 100644
index 00000000..48452579
--- /dev/null
+++ b/src/hyperlink/test/test_decoded_url.py
@@ -0,0 +1,256 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import unicode_literals
+
+from typing import Dict, Union
+from .. import DecodedURL, URL
+from .._url import _percent_decode
+from .common import HyperlinkTestCase
+
+BASIC_URL = "http://example.com/#"
+TOTAL_URL = (
+ "https://%75%73%65%72:%00%00%00%00@xn--bcher-kva.ch:8080/"
+ "a/nice%20nice/./path/?zot=23%25&zut#frég"
+)
+
+
+class TestURL(HyperlinkTestCase):
+ def test_durl_basic(self):
+ # type: () -> None
+ bdurl = DecodedURL.from_text(BASIC_URL)
+ assert bdurl.scheme == "http"
+ assert bdurl.host == "example.com"
+ assert bdurl.port == 80
+ assert bdurl.path == ("",)
+ assert bdurl.fragment == ""
+
+ durl = DecodedURL.from_text(TOTAL_URL)
+
+ assert durl.scheme == "https"
+ assert durl.host == "bücher.ch"
+ assert durl.port == 8080
+ assert durl.path == ("a", "nice nice", ".", "path", "")
+ assert durl.fragment == "frég"
+ assert durl.get("zot") == ["23%"]
+
+ assert durl.user == "user"
+ assert durl.userinfo == ("user", "\0\0\0\0")
+
+ def test_roundtrip_iri_parameter_values(self):
+ # type: () -> None
+ """
+ .to_iri() should never modify the application-level data of a query
+ parameter.
+ """
+ for value in ["hello", "goodbye", "+", "/", ":", "?"]:
+ self.assertEqual(
+ DecodedURL(DecodedURL().set("test", value).to_iri()).get(
+ "test"
+ ),
+ [value],
+ )
+
+ def test_roundtrip_uri_parameter_values(self):
+ # type: () -> None
+ """
+ .to_uri() should never modify the application-level data of a query
+ parameter.
+ """
+ for value in ["hello", "goodbye", "+", "/", ":", "?"]:
+ self.assertEqual(
+ DecodedURL(DecodedURL().set("test", value).to_uri()).get(
+ "test"
+ ),
+ [value],
+ )
+
+ def test_passthroughs(self):
+ # type: () -> None
+
+ # just basic tests for the methods that more or less pass straight
+ # through to the underlying URL
+
+ durl = DecodedURL.from_text(TOTAL_URL)
+ assert durl.sibling("te%t").path[-1] == "te%t"
+ assert durl.child("../test2%").path[-1] == "../test2%"
+ assert durl.child() == durl
+ assert durl.child() is durl
+ assert durl.click("/").path[-1] == ""
+ assert durl.user == "user"
+
+ assert "." in durl.path
+ assert "." not in durl.normalize().path
+
+ assert durl.to_uri().fragment == "fr%C3%A9g"
+ assert " " in durl.to_iri().path[1]
+
+ assert durl.to_text(with_password=True) == TOTAL_URL
+
+ assert durl.absolute
+ assert durl.rooted
+
+ assert durl == durl.encoded_url.get_decoded_url()
+
+ durl2 = DecodedURL.from_text(TOTAL_URL, lazy=True)
+ assert durl2 == durl2.encoded_url.get_decoded_url(lazy=True)
+
+ assert (
+ str(DecodedURL.from_text(BASIC_URL).child(" "))
+ == "http://example.com/%20"
+ )
+
+ assert not (durl == 1)
+ assert durl != 1
+
+ def test_repr(self):
+ # type: () -> None
+ durl = DecodedURL.from_text(TOTAL_URL)
+ assert repr(durl) == "DecodedURL(url=" + repr(durl._url) + ")"
+
+ def test_query_manipulation(self):
+ # type: () -> None
+ durl = DecodedURL.from_text(TOTAL_URL)
+
+ assert durl.get("zot") == ["23%"]
+ durl = durl.add(" ", "space")
+ assert durl.get(" ") == ["space"]
+ durl = durl.set(" ", "spa%ed")
+ assert durl.get(" ") == ["spa%ed"]
+
+ durl = DecodedURL(url=durl.to_uri())
+ assert durl.get(" ") == ["spa%ed"]
+ durl = durl.remove(" ")
+ assert durl.get(" ") == []
+
+ durl = DecodedURL.from_text("/?%61rg=b&arg=c")
+ assert durl.get("arg") == ["b", "c"]
+
+ assert durl.set("arg", "d").get("arg") == ["d"]
+
+ durl = DecodedURL.from_text(
+ "https://example.com/a/b/?fóó=1&bar=2&fóó=3"
+ )
+ assert durl.remove("fóó") == DecodedURL.from_text(
+ "https://example.com/a/b/?bar=2"
+ )
+ assert durl.remove("fóó", value="1") == DecodedURL.from_text(
+ "https://example.com/a/b/?bar=2&fóó=3"
+ )
+ assert durl.remove("fóó", limit=1) == DecodedURL.from_text(
+ "https://example.com/a/b/?bar=2&fóó=3"
+ )
+ assert durl.remove("fóó", value="1", limit=0) == DecodedURL.from_text(
+ "https://example.com/a/b/?fóó=1&bar=2&fóó=3"
+ )
+
+ def test_equality_and_hashability(self):
+ # type: () -> None
+ durl = DecodedURL.from_text(TOTAL_URL)
+ durl2 = DecodedURL.from_text(TOTAL_URL)
+ burl = DecodedURL.from_text(BASIC_URL)
+ durl_uri = durl.to_uri()
+
+ assert durl == durl
+ assert durl == durl2
+ assert durl != burl
+ assert durl is not None
+ assert durl != durl._url
+
+ AnyURL = Union[URL, DecodedURL]
+
+ durl_map = {} # type: Dict[AnyURL, AnyURL]
+ durl_map[durl] = durl
+ durl_map[durl2] = durl2
+
+ assert len(durl_map) == 1
+
+ durl_map[burl] = burl
+
+ assert len(durl_map) == 2
+
+ durl_map[durl_uri] = durl_uri
+
+ assert len(durl_map) == 3
+
+ def test_replace_roundtrip(self):
+ # type: () -> None
+ durl = DecodedURL.from_text(TOTAL_URL)
+
+ durl2 = durl.replace(
+ scheme=durl.scheme,
+ host=durl.host,
+ path=durl.path,
+ query=durl.query,
+ fragment=durl.fragment,
+ port=durl.port,
+ rooted=durl.rooted,
+ userinfo=durl.userinfo,
+ uses_netloc=durl.uses_netloc,
+ )
+
+ assert durl == durl2
+
+ def test_replace_userinfo(self):
+ # type: () -> None
+ durl = DecodedURL.from_text(TOTAL_URL)
+ with self.assertRaises(ValueError):
+ durl.replace(
+ userinfo=( # type: ignore[arg-type]
+ "user",
+ "pw",
+ "thiswillcauseafailure",
+ )
+ )
+ return
+
+ def test_twisted_compat(self):
+ # type: () -> None
+ durl = DecodedURL.from_text(TOTAL_URL)
+
+ assert durl == DecodedURL.fromText(TOTAL_URL)
+ assert "to_text" in dir(durl)
+ assert "asText" not in dir(durl)
+ assert durl.to_text() == durl.asText()
+
+ def test_percent_decode_mixed(self):
+ # type: () -> None
+
+ # See https://github.com/python-hyper/hyperlink/pull/59 for a
+ # nice discussion of the possibilities
+ assert _percent_decode("abcdé%C3%A9éfg") == "abcdéééfg"
+
+ # still allow percent encoding in the case of an error
+ assert _percent_decode("abcdé%C3éfg") == "abcdé%C3éfg"
+
+ # ...unless explicitly told otherwise
+ with self.assertRaises(UnicodeDecodeError):
+ _percent_decode("abcdé%C3éfg", raise_subencoding_exc=True)
+
+ # when not encodable as subencoding
+ assert _percent_decode("é%25é", subencoding="ascii") == "é%25é"
+
+ def test_click_decoded_url(self):
+ # type: () -> None
+ durl = DecodedURL.from_text(TOTAL_URL)
+ durl_dest = DecodedURL.from_text("/tëst")
+
+ clicked = durl.click(durl_dest)
+ assert clicked.host == durl.host
+ assert clicked.path == durl_dest.path
+ assert clicked.path == ("tëst",)
+
+ def test_decode_plus(self):
+ # type: () -> None
+ durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B")
+ assert durl.path == ("x+y+",)
+ assert durl.get("a") == ["b c+"]
+ assert durl.query == (("a", "b c+"),)
+
+ def test_decode_nonplussed(self):
+ # type: () -> None
+ durl = DecodedURL.from_text(
+ "/x+y%2B?a=b+c%2B", query_plus_is_space=False
+ )
+ assert durl.path == ("x+y+",)
+ assert durl.get("a") == ["b+c+"]
+ assert durl.query == (("a", "b+c+"),)
diff --git a/src/hyperlink/test/test_hypothesis.py b/src/hyperlink/test/test_hypothesis.py
new file mode 100644
index 00000000..776ed7b7
--- /dev/null
+++ b/src/hyperlink/test/test_hypothesis.py
@@ -0,0 +1,214 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for hyperlink.hypothesis.
+"""
+
+try:
+ import hypothesis
+
+ del hypothesis
+except ImportError:
+ pass
+else:
+ from string import digits
+ from typing import Sequence, Text
+
+ try:
+ from unittest.mock import patch
+ except ImportError:
+ from mock import patch # type: ignore[misc]
+
+ from hypothesis import given, settings
+ from hypothesis.strategies import SearchStrategy, data
+
+ from idna import IDNAError, check_label, encode as idna_encode
+
+ from .common import HyperlinkTestCase
+ from .. import DecodedURL, EncodedURL
+ from ..hypothesis import (
+ DrawCallable,
+ composite,
+ decoded_urls,
+ encoded_urls,
+ hostname_labels,
+ hostnames,
+ idna_text,
+ paths,
+ port_numbers,
+ )
+
+ class TestHypothesisStrategies(HyperlinkTestCase):
+ """
+ Tests for hyperlink.hypothesis.
+ """
+
+ @given(idna_text())
+ def test_idna_text_valid(self, text):
+ # type: (Text) -> None
+ """
+ idna_text() generates IDNA-encodable text.
+ """
+ try:
+ idna_encode(text)
+ except IDNAError: # pragma: no cover
+ raise AssertionError("Invalid IDNA text: {!r}".format(text))
+
+ @given(data())
+ def test_idna_text_min_max(self, data):
+ # type: (SearchStrategy) -> None
+ """
+ idna_text() raises AssertionError if min_size is < 1.
+ """
+ self.assertRaises(AssertionError, data.draw, idna_text(min_size=0))
+ self.assertRaises(AssertionError, data.draw, idna_text(max_size=0))
+
+ @given(port_numbers())
+ def test_port_numbers_bounds(self, port):
+ # type: (int) -> None
+ """
+ port_numbers() generates integers between 1 and 65535, inclusive.
+ """
+ self.assertGreaterEqual(port, 1)
+ self.assertLessEqual(port, 65535)
+
+ @given(port_numbers(allow_zero=True))
+ def test_port_numbers_bounds_allow_zero(self, port):
+ # type: (int) -> None
+ """
+ port_numbers(allow_zero=True) generates integers between 0 and
+ 65535, inclusive.
+ """
+ self.assertGreaterEqual(port, 0)
+ self.assertLessEqual(port, 65535)
+
+ @given(hostname_labels())
+ def test_hostname_labels_valid_idn(self, label):
+ # type: (Text) -> None
+ """
+ hostname_labels() generates IDN host name labels.
+ """
+ try:
+ check_label(label)
+ idna_encode(label)
+ except UnicodeError: # pragma: no cover
+ raise AssertionError("Invalid IDN label: {!r}".format(label))
+
+ @given(data())
+ @settings(max_examples=10)
+ def test_hostname_labels_long_idn_punycode(self, data):
+ # type: (SearchStrategy) -> None
+ """
+ hostname_labels() handles case where idna_text() generates text
+ that encoded to punycode ends up as longer than allowed.
+ """
+
+ @composite
+ def mock_idna_text(draw, min_size, max_size):
+ # type: (DrawCallable, int, int) -> Text
+ # We want a string that does not exceed max_size, but when
+ # encoded to punycode, does exceed max_size.
+ # So use a unicode character that is larger when encoded,
+ # "á" being a great example, and use it max_size times, which
+ # will be max_size * 3 in size when encoded.
+ return u"\N{LATIN SMALL LETTER A WITH ACUTE}" * max_size
+
+ with patch("hyperlink.hypothesis.idna_text", mock_idna_text):
+ label = data.draw(hostname_labels())
+ try:
+ check_label(label)
+ idna_encode(label)
+ except UnicodeError: # pragma: no cover
+ raise AssertionError(
+ "Invalid IDN label: {!r}".format(label)
+ )
+
+ @given(hostname_labels(allow_idn=False))
+ def test_hostname_labels_valid_ascii(self, label):
+ # type: (Text) -> None
+ """
+ hostname_labels() generates a ASCII host name labels.
+ """
+ try:
+ check_label(label)
+ label.encode("ascii")
+ except UnicodeError: # pragma: no cover
+ raise AssertionError("Invalid ASCII label: {!r}".format(label))
+
+ @given(hostnames())
+ def test_hostnames_idn(self, hostname):
+ # type: (Text) -> None
+ """
+ hostnames() generates a IDN host names.
+ """
+ try:
+ for label in hostname.split(u"."):
+ check_label(label)
+ idna_encode(hostname)
+ except UnicodeError: # pragma: no cover
+ raise AssertionError(
+ "Invalid IDN host name: {!r}".format(hostname)
+ )
+
+ @given(hostnames(allow_leading_digit=False))
+ def test_hostnames_idn_nolead(self, hostname):
+ # type: (Text) -> None
+ """
+ hostnames(allow_leading_digit=False) generates a IDN host names
+ without leading digits.
+ """
+ self.assertTrue(hostname == hostname.lstrip(digits))
+
+ @given(hostnames(allow_idn=False))
+ def test_hostnames_ascii(self, hostname):
+ # type: (Text) -> None
+ """
+ hostnames() generates a ASCII host names.
+ """
+ try:
+ for label in hostname.split(u"."):
+ check_label(label)
+ hostname.encode("ascii")
+ except UnicodeError: # pragma: no cover
+ raise AssertionError(
+ "Invalid ASCII host name: {!r}".format(hostname)
+ )
+
+ @given(hostnames(allow_leading_digit=False, allow_idn=False))
+ def test_hostnames_ascii_nolead(self, hostname):
+ # type: (Text) -> None
+ """
+ hostnames(allow_leading_digit=False, allow_idn=False) generates
+ ASCII host names without leading digits.
+ """
+ self.assertTrue(hostname == hostname.lstrip(digits))
+
+ @given(paths())
+ def test_paths(self, path):
+ # type: (Sequence[Text]) -> None
+ """
+ paths() generates sequences of URL path components.
+ """
+ text = u"/".join(path)
+ try:
+ text.encode("utf-8")
+ except UnicodeError: # pragma: no cover
+ raise AssertionError("Invalid URL path: {!r}".format(path))
+
+ for segment in path:
+ self.assertNotIn("#/?", segment)
+
+ @given(encoded_urls())
+ def test_encoded_urls(self, url):
+ # type: (EncodedURL) -> None
+ """
+ encoded_urls() generates EncodedURLs.
+ """
+ self.assertIsInstance(url, EncodedURL)
+
+ @given(decoded_urls())
+ def test_decoded_urls(self, url):
+ # type: (DecodedURL) -> None
+ """
+ decoded_urls() generates DecodedURLs.
+ """
+ self.assertIsInstance(url, DecodedURL)
diff --git a/hyperlink/test/test_parse.py b/src/hyperlink/test/test_parse.py
similarity index 64%
rename from hyperlink/test/test_parse.py
rename to src/hyperlink/test/test_parse.py
index cd2e9c97..66b02709 100644
--- a/hyperlink/test/test_parse.py
+++ b/src/hyperlink/test/test_parse.py
@@ -5,24 +5,28 @@
from .common import HyperlinkTestCase
from hyperlink import parse, EncodedURL, DecodedURL
-BASIC_URL = 'http://example.com/#'
-TOTAL_URL = "https://%75%73%65%72:%00%00%00%00@xn--bcher-kva.ch:8080/a/nice%20nice/./path/?zot=23%25&zut#frég"
-UNDECODABLE_FRAG_URL = TOTAL_URL + '%C3'
+BASIC_URL = "http://example.com/#"
+TOTAL_URL = (
+ "https://%75%73%65%72:%00%00%00%00@xn--bcher-kva.ch:8080"
+ "/a/nice%20nice/./path/?zot=23%25&zut#frég"
+)
+UNDECODABLE_FRAG_URL = TOTAL_URL + "%C3"
# the %C3 above percent-decodes to an unpaired \xc3 byte which makes this
# invalid utf8
class TestURL(HyperlinkTestCase):
def test_parse(self):
+ # type: () -> None
purl = parse(TOTAL_URL)
assert isinstance(purl, DecodedURL)
- assert purl.user == 'user'
- assert purl.get('zot') == ['23%']
- assert purl.fragment == 'frég'
+ assert purl.user == "user"
+ assert purl.get("zot") == ["23%"]
+ assert purl.fragment == "frég"
purl2 = parse(TOTAL_URL, decoded=False)
assert isinstance(purl2, EncodedURL)
- assert purl2.get('zot') == ['23%25']
+ assert purl2.get("zot") == ["23%25"]
with self.assertRaises(UnicodeDecodeError):
purl3 = parse(UNDECODABLE_FRAG_URL)
@@ -31,5 +35,3 @@ def test_parse(self):
with self.assertRaises(UnicodeDecodeError):
purl3.fragment
-
- return
diff --git a/src/hyperlink/test/test_scheme_registration.py b/src/hyperlink/test/test_scheme_registration.py
new file mode 100644
index 00000000..b43c91e3
--- /dev/null
+++ b/src/hyperlink/test/test_scheme_registration.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from typing import cast
+
+
+from .. import _url
+from .common import HyperlinkTestCase
+from .._url import register_scheme, URL, DecodedURL
+
+
+class TestSchemeRegistration(HyperlinkTestCase):
+ def setUp(self):
+ # type: () -> None
+ self._orig_scheme_port_map = dict(_url.SCHEME_PORT_MAP)
+ self._orig_no_netloc_schemes = set(_url.NO_NETLOC_SCHEMES)
+
+ def tearDown(self):
+ # type: () -> None
+ _url.SCHEME_PORT_MAP = self._orig_scheme_port_map
+ _url.NO_NETLOC_SCHEMES = self._orig_no_netloc_schemes
+
+ def test_register_scheme_basic(self):
+ # type: () -> None
+ register_scheme("deltron", uses_netloc=True, default_port=3030)
+
+ u1 = URL.from_text("deltron://example.com")
+ assert u1.scheme == "deltron"
+ assert u1.port == 3030
+ assert u1.uses_netloc is True
+
+ # test netloc works even when the original gives no indication
+ u2 = URL.from_text("deltron:")
+ u2 = u2.replace(host="example.com")
+ assert u2.to_text() == "deltron://example.com"
+
+ # test default port means no emission
+ u3 = URL.from_text("deltron://example.com:3030")
+ assert u3.to_text() == "deltron://example.com"
+
+ register_scheme("nonetron", default_port=3031)
+ u4 = URL(scheme="nonetron")
+ u4 = u4.replace(host="example.com")
+ assert u4.to_text() == "nonetron://example.com"
+
+ def test_register_no_netloc_scheme(self):
+ # type: () -> None
+ register_scheme("noloctron", uses_netloc=False)
+ u4 = URL(scheme="noloctron")
+ u4 = u4.replace(path=("example", "path"))
+ assert u4.to_text() == "noloctron:example/path"
+
+ def test_register_no_netloc_with_port(self):
+ # type: () -> None
+ with self.assertRaises(ValueError):
+ register_scheme("badnetlocless", uses_netloc=False, default_port=7)
+
+ def test_invalid_uses_netloc(self):
+ # type: () -> None
+ with self.assertRaises(ValueError):
+ register_scheme("badnetloc", uses_netloc=cast(bool, None))
+ with self.assertRaises(ValueError):
+ register_scheme("badnetloc", uses_netloc=cast(bool, object()))
+
+ def test_register_invalid_uses_netloc(self):
+ # type: () -> None
+ with self.assertRaises(ValueError):
+ register_scheme("lol", uses_netloc=cast(bool, object()))
+
+ def test_register_invalid_port(self):
+ # type: () -> None
+ with self.assertRaises(ValueError):
+ register_scheme("nope", default_port=cast(bool, object()))
+
+ def test_register_no_quote_plus_scheme(self):
+ # type: () -> None
+ register_scheme("keepplus", query_plus_is_space=False)
+ plus_is_not_space = DecodedURL.from_text(
+ "keepplus://example.com/?q=a+b"
+ )
+ plus_is_space = DecodedURL.from_text("https://example.com/?q=a+b")
+ assert plus_is_not_space.get("q") == ["a+b"]
+ assert plus_is_space.get("q") == ["a b"]
diff --git a/src/hyperlink/test/test_socket.py b/src/hyperlink/test/test_socket.py
new file mode 100644
index 00000000..5f83d45b
--- /dev/null
+++ b/src/hyperlink/test/test_socket.py
@@ -0,0 +1,45 @@
+# mypy: always-true=inet_pton
+
+try:
+ from socket import inet_pton
+except ImportError:
+ inet_pton = None # type: ignore[assignment]
+
+if not inet_pton:
+ import socket
+
+ from .common import HyperlinkTestCase
+ from .._socket import inet_pton
+
+ class TestSocket(HyperlinkTestCase):
+ def test_inet_pton_ipv4_valid(self):
+ # type: () -> None
+ data = inet_pton(socket.AF_INET, "127.0.0.1")
+ assert isinstance(data, bytes)
+
+ def test_inet_pton_ipv4_bogus(self):
+ # type: () -> None
+ with self.assertRaises(socket.error):
+ inet_pton(socket.AF_INET, "blah")
+
+ def test_inet_pton_ipv6_valid(self):
+ # type: () -> None
+ data = inet_pton(socket.AF_INET6, "::1")
+ assert isinstance(data, bytes)
+
+ def test_inet_pton_ipv6_bogus(self):
+ # type: () -> None
+ with self.assertRaises(socket.error):
+ inet_pton(socket.AF_INET6, "blah")
+
+ def test_inet_pton_bogus_family(self):
+ # type: () -> None
+ # Find an integer not associated with a known address family
+ i = int(socket.AF_INET6)
+ while True:
+ if i != socket.AF_INET and i != socket.AF_INET6:
+ break
+ i += 100
+
+ with self.assertRaises(socket.error):
+ inet_pton(i, "127.0.0.1")
diff --git a/src/hyperlink/test/test_url.py b/src/hyperlink/test/test_url.py
new file mode 100644
index 00000000..37c91726
--- /dev/null
+++ b/src/hyperlink/test/test_url.py
@@ -0,0 +1,1495 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) Twisted Matrix Laboratories.
+# See LICENSE for details.
+
+from __future__ import unicode_literals
+
+import sys
+import socket
+from typing import Any, Iterable, Optional, Text, Tuple, cast
+
+from .common import HyperlinkTestCase
+from .. import URL, URLParseError
+from .._url import inet_pton, SCHEME_PORT_MAP
+
+
+PY2 = sys.version_info[0] == 2
+unicode = type("")
+
+
+BASIC_URL = "http://www.foo.com/a/nice/path/?zot=23&zut"
+
+# Examples from RFC 3986 section 5.4, Reference Resolution Examples
+relativeLinkBaseForRFC3986 = "http://a/b/c/d;p?q"
+relativeLinkTestsForRFC3986 = [
+ # "Normal"
+ # ('g:h', 'g:h'), # can't click on a scheme-having url without an abs path
+ ("g", "http://a/b/c/g"),
+ ("./g", "http://a/b/c/g"),
+ ("g/", "http://a/b/c/g/"),
+ ("/g", "http://a/g"),
+ ("//g", "http://g"),
+ ("?y", "http://a/b/c/d;p?y"),
+ ("g?y", "http://a/b/c/g?y"),
+ ("#s", "http://a/b/c/d;p?q#s"),
+ ("g#s", "http://a/b/c/g#s"),
+ ("g?y#s", "http://a/b/c/g?y#s"),
+ (";x", "http://a/b/c/;x"),
+ ("g;x", "http://a/b/c/g;x"),
+ ("g;x?y#s", "http://a/b/c/g;x?y#s"),
+ ("", "http://a/b/c/d;p?q"),
+ (".", "http://a/b/c/"),
+ ("./", "http://a/b/c/"),
+ ("..", "http://a/b/"),
+ ("../", "http://a/b/"),
+ ("../g", "http://a/b/g"),
+ ("../..", "http://a/"),
+ ("../../", "http://a/"),
+ ("../../g", "http://a/g"),
+ # Abnormal examples
+ # ".." cannot be used to change the authority component of a URI.
+ ("../../../g", "http://a/g"),
+ ("../../../../g", "http://a/g"),
+ # Only include "." and ".." when they are only part of a larger segment,
+ # not by themselves.
+ ("/./g", "http://a/g"),
+ ("/../g", "http://a/g"),
+ ("g.", "http://a/b/c/g."),
+ (".g", "http://a/b/c/.g"),
+ ("g..", "http://a/b/c/g.."),
+ ("..g", "http://a/b/c/..g"),
+ # Unnecessary or nonsensical forms of "." and "..".
+ ("./../g", "http://a/b/g"),
+ ("./g/.", "http://a/b/c/g/"),
+ ("g/./h", "http://a/b/c/g/h"),
+ ("g/../h", "http://a/b/c/h"),
+ ("g;x=1/./y", "http://a/b/c/g;x=1/y"),
+ ("g;x=1/../y", "http://a/b/c/y"),
+ # Separating the reference's query and fragment components from the path.
+ ("g?y/./x", "http://a/b/c/g?y/./x"),
+ ("g?y/../x", "http://a/b/c/g?y/../x"),
+ ("g#s/./x", "http://a/b/c/g#s/./x"),
+ ("g#s/../x", "http://a/b/c/g#s/../x"),
+]
+
+
+ROUNDTRIP_TESTS = (
+ "http://localhost",
+ "http://localhost/",
+ "http://127.0.0.1/",
+ "http://[::127.0.0.1]/",
+ "http://[::1]/",
+ "http://localhost/foo",
+ "http://localhost/foo/",
+ "http://localhost/foo!!bar/",
+ "http://localhost/foo%20bar/",
+ "http://localhost/foo%2Fbar/",
+ "http://localhost/foo?n",
+ "http://localhost/foo?n=v",
+ "http://localhost/foo?n=/a/b",
+ "http://example.com/foo!@$bar?b!@z=123",
+ "http://localhost/asd?a=asd%20sdf/345",
+ "http://(%2525)/(%2525)?(%2525)&(%2525)=(%2525)#(%2525)",
+ "http://(%C3%A9)/(%C3%A9)?(%C3%A9)&(%C3%A9)=(%C3%A9)#(%C3%A9)",
+ "?sslrootcert=/Users/glyph/Downloads/rds-ca-2015-root.pem&sslmode=verify",
+ # from boltons.urlutils' tests
+ "http://googlewebsite.com/e-shops.aspx",
+ "http://example.com:8080/search?q=123&business=Nothing%20Special",
+ "http://hatnote.com:9000/?arg=1&arg=2&arg=3",
+ "https://xn--bcher-kva.ch",
+ "http://xn--ggbla1c4e.xn--ngbc5azd/",
+ "http://tools.ietf.org/html/rfc3986#section-3.4",
+ # 'http://wiki:pedia@hatnote.com',
+ "ftp://ftp.rfc-editor.org/in-notes/tar/RFCs0001-0500.tar.gz",
+ "http://[1080:0:0:0:8:800:200C:417A]/index.html",
+ "ssh://192.0.2.16:2222/",
+ "https://[::101.45.75.219]:80/?hi=bye",
+ "ldap://[::192.9.5.5]/dc=example,dc=com??sub?(sn=Jensen)",
+ "mailto:me@example.com?to=me@example.com&body=hi%20http://wikipedia.org",
+ "news:alt.rec.motorcycle",
+ "tel:+1-800-867-5309",
+ "urn:oasis:member:A00024:x",
+ (
+ "magnet:?xt=urn:btih:1a42b9e04e122b97a5254e3df77ab3c4b7da725f&dn=Puppy%"
+ "20Linux%20precise-5.7.1.iso&tr=udp://tracker.openbittorrent.com:80&"
+ "tr=udp://tracker.publicbt.com:80&tr=udp://tracker.istole.it:6969&"
+ "tr=udp://tracker.ccc.de:80&tr=udp://open.demonii.com:1337"
+ ),
+ # percent-encoded delimiters in percent-encodable fields
+ "https://%3A@example.com/", # colon in username
+ "https://%40@example.com/", # at sign in username
+ "https://%2f@example.com/", # slash in username
+ "https://a:%3a@example.com/", # colon in password
+ "https://a:%40@example.com/", # at sign in password
+ "https://a:%2f@example.com/", # slash in password
+ "https://a:%3f@example.com/", # question mark in password
+ "https://example.com/%2F/", # slash in path
+ "https://example.com/%3F/", # question mark in path
+ "https://example.com/%23/", # hash in path
+ "https://example.com/?%23=b", # hash in query param name
+ "https://example.com/?%3D=b", # equals in query param name
+ "https://example.com/?%26=b", # ampersand in query param name
+ "https://example.com/?a=%23", # hash in query param value
+ "https://example.com/?a=%26", # ampersand in query param value
+ "https://example.com/?a=%3D", # equals in query param value
+ "https://example.com/?foo+bar=baz", # plus in query param name
+ "https://example.com/?foo=bar+baz", # plus in query param value
+ # double-encoded percent sign in all percent-encodable positions:
+ "http://(%2525):(%2525)@example.com/(%2525)/?(%2525)=(%2525)#(%2525)",
+ # colon in first part of schemeless relative url
+ "first_seg_rel_path__colon%3Anotok/second_seg__colon%3Aok",
+)
+
+
+class TestURL(HyperlinkTestCase):
+ """
+ Tests for L{URL}.
+ """
+
+ def assertUnicoded(self, u):
+ # type: (URL) -> None
+ """
+ The given L{URL}'s components should be L{unicode}.
+
+ @param u: The L{URL} to test.
+ """
+ self.assertTrue(
+ isinstance(u.scheme, unicode) or u.scheme is None, repr(u)
+ )
+ self.assertTrue(isinstance(u.host, unicode) or u.host is None, repr(u))
+ for seg in u.path:
+ self.assertEqual(type(seg), unicode, repr(u))
+ for (_k, v) in u.query:
+ self.assertEqual(type(seg), unicode, repr(u))
+ self.assertTrue(v is None or isinstance(v, unicode), repr(u))
+ self.assertEqual(type(u.fragment), unicode, repr(u))
+
+ def assertURL(
+ self,
+ u, # type: URL
+ scheme, # type: Text
+ host, # type: Text
+ path, # type: Iterable[Text]
+ query, # type: Iterable[Tuple[Text, Optional[Text]]]
+ fragment, # type: Text
+ port, # type: Optional[int]
+ userinfo="", # type: Text
+ ):
+ # type: (...) -> None
+ """
+ The given L{URL} should have the given components.
+
+ @param u: The actual L{URL} to examine.
+
+ @param scheme: The expected scheme.
+
+ @param host: The expected host.
+
+ @param path: The expected path.
+
+ @param query: The expected query.
+
+ @param fragment: The expected fragment.
+
+ @param port: The expected port.
+
+ @param userinfo: The expected userinfo.
+ """
+ actual = (
+ u.scheme,
+ u.host,
+ u.path,
+ u.query,
+ u.fragment,
+ u.port,
+ u.userinfo,
+ )
+ expected = (
+ scheme,
+ host,
+ tuple(path),
+ tuple(query),
+ fragment,
+ port,
+ u.userinfo,
+ )
+ self.assertEqual(actual, expected)
+
+ def test_initDefaults(self):
+ # type: () -> None
+ """
+ L{URL} should have appropriate default values.
+ """
+
+ def check(u):
+ # type: (URL) -> None
+ self.assertUnicoded(u)
+ self.assertURL(u, "http", "", [], [], "", 80, "")
+
+ check(URL("http", ""))
+ check(URL("http", "", [], []))
+ check(URL("http", "", [], [], ""))
+
+ def test_init(self):
+ # type: () -> None
+ """
+ L{URL} should accept L{unicode} parameters.
+ """
+ u = URL("s", "h", ["p"], [("k", "v"), ("k", None)], "f")
+ self.assertUnicoded(u)
+ self.assertURL(u, "s", "h", ["p"], [("k", "v"), ("k", None)], "f", None)
+
+ self.assertURL(
+ URL("http", "\xe0", ["\xe9"], [("\u03bb", "\u03c0")], "\u22a5"),
+ "http",
+ "\xe0",
+ ["\xe9"],
+ [("\u03bb", "\u03c0")],
+ "\u22a5",
+ 80,
+ )
+
+ def test_initPercent(self):
+ # type: () -> None
+ """
+ L{URL} should accept (and not interpret) percent characters.
+ """
+ u = URL("s", "%68", ["%70"], [("%6B", "%76"), ("%6B", None)], "%66")
+ self.assertUnicoded(u)
+ self.assertURL(
+ u, "s", "%68", ["%70"], [("%6B", "%76"), ("%6B", None)], "%66", None
+ )
+
+ def test_repr(self):
+ # type: () -> None
+ """
+ L{URL.__repr__} will display the canonical form of the URL, wrapped in
+ a L{URL.from_text} invocation, so that it is C{eval}-able but still
+ easy to read.
+ """
+ self.assertEqual(
+ repr(
+ URL(
+ scheme="http",
+ host="foo",
+ path=["bar"],
+ query=[("baz", None), ("k", "v")],
+ fragment="frob",
+ )
+ ),
+ "URL.from_text(%s)" % (repr("http://foo/bar?baz&k=v#frob"),),
+ )
+
+ def test_from_text(self):
+ # type: () -> None
+ """
+ Round-tripping L{URL.from_text} with C{str} results in an equivalent
+ URL.
+ """
+ urlpath = URL.from_text(BASIC_URL)
+ self.assertEqual(BASIC_URL, urlpath.to_text())
+
+ def test_roundtrip(self):
+ # type: () -> None
+ """
+ L{URL.to_text} should invert L{URL.from_text}.
+ """
+ for test in ROUNDTRIP_TESTS:
+ result = URL.from_text(test).to_text(with_password=True)
+ self.assertEqual(test, result)
+
+ def test_roundtrip_double_iri(self):
+ # type: () -> None
+ for test in ROUNDTRIP_TESTS:
+ url = URL.from_text(test)
+ iri = url.to_iri()
+ double_iri = iri.to_iri()
+ assert iri == double_iri
+
+ iri_text = iri.to_text(with_password=True)
+ double_iri_text = double_iri.to_text(with_password=True)
+ assert iri_text == double_iri_text
+ return
+
+ def test_equality(self):
+ # type: () -> None
+ """
+ Two URLs decoded using L{URL.from_text} will be equal (C{==}) if they
+ decoded same URL string, and unequal (C{!=}) if they decoded different
+ strings.
+ """
+ urlpath = URL.from_text(BASIC_URL)
+ self.assertEqual(urlpath, URL.from_text(BASIC_URL))
+ self.assertNotEqual(
+ urlpath,
+ URL.from_text(
+ "ftp://www.anotherinvaliddomain.com/" "foo/bar/baz/?zot=21&zut"
+ ),
+ )
+
+ def test_fragmentEquality(self):
+ # type: () -> None
+ """
+ An URL created with the empty string for a fragment compares equal
+ to an URL created with an unspecified fragment.
+ """
+ self.assertEqual(URL(fragment=""), URL())
+ self.assertEqual(
+ URL.from_text("http://localhost/#"),
+ URL.from_text("http://localhost/"),
+ )
+
+ def test_child(self):
+ # type: () -> None
+ """
+ L{URL.child} appends a new path segment, but does not affect the query
+ or fragment.
+ """
+ urlpath = URL.from_text(BASIC_URL)
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/gong?zot=23&zut",
+ urlpath.child("gong").to_text(),
+ )
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/gong%2F?zot=23&zut",
+ urlpath.child("gong/").to_text(),
+ )
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/gong%2Fdouble?zot=23&zut",
+ urlpath.child("gong/double").to_text(),
+ )
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/gong%2Fdouble%2F?zot=23&zut",
+ urlpath.child("gong/double/").to_text(),
+ )
+
+ def test_multiChild(self):
+ # type: () -> None
+ """
+ L{URL.child} receives multiple segments as C{*args} and appends each in
+ turn.
+ """
+ url = URL.from_text("http://example.com/a/b")
+ self.assertEqual(
+ url.child("c", "d", "e").to_text(), "http://example.com/a/b/c/d/e"
+ )
+
+ def test_childInitRoot(self):
+ # type: () -> None
+ """
+ L{URL.child} of a L{URL} without a path produces a L{URL} with a single
+ path segment.
+ """
+ childURL = URL(host="www.foo.com").child("c")
+ self.assertTrue(childURL.rooted)
+ self.assertEqual("http://www.foo.com/c", childURL.to_text())
+
+ def test_emptyChild(self):
+ # type: () -> None
+ """
+ L{URL.child} without any new segments returns the original L{URL}.
+ """
+ url = URL(host="www.foo.com")
+ self.assertEqual(url.child(), url)
+
+ def test_sibling(self):
+ # type: () -> None
+ """
+ L{URL.sibling} of a L{URL} replaces the last path segment, but does not
+ affect the query or fragment.
+ """
+ urlpath = URL.from_text(BASIC_URL)
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/sister?zot=23&zut",
+ urlpath.sibling("sister").to_text(),
+ )
+ # Use an url without trailing '/' to check child removal.
+ url_text = "http://www.foo.com/a/nice/path?zot=23&zut"
+ urlpath = URL.from_text(url_text)
+ self.assertEqual(
+ "http://www.foo.com/a/nice/sister?zot=23&zut",
+ urlpath.sibling("sister").to_text(),
+ )
+
+ def test_click(self):
+ # type: () -> None
+ """
+ L{URL.click} interprets the given string as a relative URI-reference
+ and returns a new L{URL} interpreting C{self} as the base absolute URI.
+ """
+ urlpath = URL.from_text(BASIC_URL)
+ # A null uri should be valid (return here).
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?zot=23&zut",
+ urlpath.click("").to_text(),
+ )
+ # A simple relative path remove the query.
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/click",
+ urlpath.click("click").to_text(),
+ )
+ # An absolute path replace path and query.
+ self.assertEqual(
+ "http://www.foo.com/click", urlpath.click("/click").to_text()
+ )
+ # Replace just the query.
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?burp",
+ urlpath.click("?burp").to_text(),
+ )
+ # One full url to another should not generate '//' between authority.
+ # and path
+ self.assertTrue(
+ "//foobar"
+ not in urlpath.click("http://www.foo.com/foobar").to_text()
+ )
+
+ # From a url with no query clicking a url with a query, the query
+ # should be handled properly.
+ u = URL.from_text("http://www.foo.com/me/noquery")
+ self.assertEqual(
+ "http://www.foo.com/me/17?spam=158",
+ u.click("/me/17?spam=158").to_text(),
+ )
+
+ # Check that everything from the path onward is removed when the click
+ # link has no path.
+ u = URL.from_text("http://localhost/foo?abc=def")
+ self.assertEqual(
+ u.click("http://www.python.org").to_text(), "http://www.python.org"
+ )
+
+ # https://twistedmatrix.com/trac/ticket/8184
+ u = URL.from_text("http://hatnote.com/a/b/../c/./d/e/..")
+ res = "http://hatnote.com/a/c/d/"
+ self.assertEqual(u.click("").to_text(), res)
+
+ # test click default arg is same as empty string above
+ self.assertEqual(u.click().to_text(), res)
+
+ # test click on a URL instance
+ u = URL.fromText("http://localhost/foo/?abc=def")
+ u2 = URL.from_text("bar")
+ u3 = u.click(u2)
+ self.assertEqual(u3.to_text(), "http://localhost/foo/bar")
+
+ def test_clickRFC3986(self):
+ # type: () -> None
+ """
+ L{URL.click} should correctly resolve the examples in RFC 3986.
+ """
+ base = URL.from_text(relativeLinkBaseForRFC3986)
+ for (ref, expected) in relativeLinkTestsForRFC3986:
+ self.assertEqual(base.click(ref).to_text(), expected)
+
+ def test_clickSchemeRelPath(self):
+ # type: () -> None
+ """
+ L{URL.click} should not accept schemes with relative paths.
+ """
+ base = URL.from_text(relativeLinkBaseForRFC3986)
+ self.assertRaises(NotImplementedError, base.click, "g:h")
+ self.assertRaises(NotImplementedError, base.click, "http:h")
+
+ def test_cloneUnchanged(self):
+ # type: () -> None
+ """
+ Verify that L{URL.replace} doesn't change any of the arguments it
+ is passed.
+ """
+ urlpath = URL.from_text("https://x:1/y?z=1#A")
+ self.assertEqual(
+ urlpath.replace(
+ urlpath.scheme,
+ urlpath.host,
+ urlpath.path,
+ urlpath.query,
+ urlpath.fragment,
+ urlpath.port,
+ ),
+ urlpath,
+ )
+ self.assertEqual(urlpath.replace(), urlpath)
+
+ def test_clickCollapse(self):
+ # type: () -> None
+ """
+ L{URL.click} collapses C{.} and C{..} according to RFC 3986 section
+ 5.2.4.
+ """
+ tests = [
+ ["http://localhost/", ".", "http://localhost/"],
+ ["http://localhost/", "..", "http://localhost/"],
+ ["http://localhost/a/b/c", ".", "http://localhost/a/b/"],
+ ["http://localhost/a/b/c", "..", "http://localhost/a/"],
+ ["http://localhost/a/b/c", "./d/e", "http://localhost/a/b/d/e"],
+ ["http://localhost/a/b/c", "../d/e", "http://localhost/a/d/e"],
+ ["http://localhost/a/b/c", "/./d/e", "http://localhost/d/e"],
+ ["http://localhost/a/b/c", "/../d/e", "http://localhost/d/e"],
+ [
+ "http://localhost/a/b/c/",
+ "../../d/e/",
+ "http://localhost/a/d/e/",
+ ],
+ ["http://localhost/a/./c", "../d/e", "http://localhost/d/e"],
+ ["http://localhost/a/./c/", "../d/e", "http://localhost/a/d/e"],
+ [
+ "http://localhost/a/b/c/d",
+ "./e/../f/../g",
+ "http://localhost/a/b/c/g",
+ ],
+ ["http://localhost/a/b/c", "d//e", "http://localhost/a/b/d//e"],
+ ]
+ for start, click, expected in tests:
+ actual = URL.from_text(start).click(click).to_text()
+ self.assertEqual(
+ actual,
+ expected,
+ "{start}.click({click}) => {actual} not {expected}".format(
+ start=start,
+ click=repr(click),
+ actual=actual,
+ expected=expected,
+ ),
+ )
+
+ def test_queryAdd(self):
+ # type: () -> None
+ """
+ L{URL.add} adds query parameters.
+ """
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?foo=bar",
+ URL.from_text("http://www.foo.com/a/nice/path/")
+ .add("foo", "bar")
+ .to_text(),
+ )
+ self.assertEqual(
+ "http://www.foo.com/?foo=bar",
+ URL(host="www.foo.com").add("foo", "bar").to_text(),
+ )
+ urlpath = URL.from_text(BASIC_URL)
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?zot=23&zut&burp",
+ urlpath.add("burp").to_text(),
+ )
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx",
+ urlpath.add("burp", "xxx").to_text(),
+ )
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zing",
+ urlpath.add("burp", "xxx").add("zing").to_text(),
+ )
+ # Note the inversion!
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?zot=23&zut&zing&burp=xxx",
+ urlpath.add("zing").add("burp", "xxx").to_text(),
+ )
+ # Note the two values for the same name.
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zot=32",
+ urlpath.add("burp", "xxx").add("zot", "32").to_text(),
+ )
+
+ def test_querySet(self):
+ # type: () -> None
+ """
+ L{URL.set} replaces query parameters by name.
+ """
+ urlpath = URL.from_text(BASIC_URL)
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?zot=32&zut",
+ urlpath.set("zot", "32").to_text(),
+ )
+ # Replace name without value with name/value and vice-versa.
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?zot&zut=itworked",
+ urlpath.set("zot").set("zut", "itworked").to_text(),
+ )
+ # Q: what happens when the query has two values and we replace?
+ # A: we replace both values with a single one
+ self.assertEqual(
+ "http://www.foo.com/a/nice/path/?zot=32&zut",
+ urlpath.add("zot", "xxx").set("zot", "32").to_text(),
+ )
+
+ def test_queryRemove(self):
+ # type: () -> None
+ """
+ L{URL.remove} removes instances of a query parameter.
+ """
+ url = URL.from_text("https://example.com/a/b/?foo=1&bar=2&foo=3")
+ self.assertEqual(
+ url.remove("foo"), URL.from_text("https://example.com/a/b/?bar=2")
+ )
+
+ self.assertEqual(
+ url.remove(name="foo", value="1"),
+ URL.from_text("https://example.com/a/b/?bar=2&foo=3"),
+ )
+
+ self.assertEqual(
+ url.remove(name="foo", limit=1),
+ URL.from_text("https://example.com/a/b/?bar=2&foo=3"),
+ )
+
+ self.assertEqual(
+ url.remove(name="foo", value="1", limit=0),
+ URL.from_text("https://example.com/a/b/?foo=1&bar=2&foo=3"),
+ )
+
+ def test_parseEqualSignInParamValue(self):
+ # type: () -> None
+ """
+ Every C{=}-sign after the first in a query parameter is simply included
+ in the value of the parameter.
+ """
+ u = URL.from_text("http://localhost/?=x=x=x")
+ self.assertEqual(u.get(""), ["x=x=x"])
+ self.assertEqual(u.to_text(), "http://localhost/?=x=x=x")
+ u = URL.from_text("http://localhost/?foo=x=x=x&bar=y")
+ self.assertEqual(u.query, (("foo", "x=x=x"), ("bar", "y")))
+ self.assertEqual(u.to_text(), "http://localhost/?foo=x=x=x&bar=y")
+
+ u = URL.from_text(
+ "https://example.com/?argument=3&argument=4&operator=%3D"
+ )
+ iri = u.to_iri()
+ self.assertEqual(iri.get("operator"), ["="])
+ # assert that the equals is not unnecessarily escaped
+ self.assertEqual(iri.to_uri().get("operator"), ["="])
+
+ def test_empty(self):
+ # type: () -> None
+ """
+ An empty L{URL} should serialize as the empty string.
+ """
+ self.assertEqual(URL().to_text(), "")
+
+ def test_justQueryText(self):
+ # type: () -> None
+ """
+ An L{URL} with query text should serialize as just query text.
+ """
+ u = URL(query=[("hello", "world")])
+ self.assertEqual(u.to_text(), "?hello=world")
+
+ def test_identicalEqual(self):
+ # type: () -> None
+ """
+ L{URL} compares equal to itself.
+ """
+ u = URL.from_text("http://localhost/")
+ self.assertEqual(u, u)
+
+ def test_similarEqual(self):
+ # type: () -> None
+ """
+ URLs with equivalent components should compare equal.
+ """
+ u1 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f")
+ u2 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f")
+ self.assertEqual(u1, u2)
+
+ def test_differentNotEqual(self):
+ # type: () -> None
+ """
+ L{URL}s that refer to different resources are both unequal (C{!=}) and
+ also not equal (not C{==}).
+ """
+ u1 = URL.from_text("http://localhost/a")
+ u2 = URL.from_text("http://localhost/b")
+ self.assertFalse(u1 == u2, "%r != %r" % (u1, u2))
+ self.assertNotEqual(u1, u2)
+
+ def test_otherTypesNotEqual(self):
+ # type: () -> None
+ """
+ L{URL} is not equal (C{==}) to other types.
+ """
+ u = URL.from_text("http://localhost/")
+ self.assertFalse(u == 42, "URL must not equal a number.")
+ self.assertFalse(u == object(), "URL must not equal an object.")
+ self.assertNotEqual(u, 42)
+ self.assertNotEqual(u, object())
+
+ def test_identicalNotUnequal(self):
+ # type: () -> None
+ """
+ Identical L{URL}s are not unequal (C{!=}) to each other.
+ """
+ u = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f")
+ self.assertFalse(u != u, "%r == itself" % u)
+
+ def test_similarNotUnequal(self):
+ # type: () -> None
+ """
+ Structurally similar L{URL}s are not unequal (C{!=}) to each other.
+ """
+ u1 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f")
+ u2 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f")
+ self.assertFalse(u1 != u2, "%r == %r" % (u1, u2))
+
+ def test_differentUnequal(self):
+ # type: () -> None
+ """
+ Structurally different L{URL}s are unequal (C{!=}) to each other.
+ """
+ u1 = URL.from_text("http://localhost/a")
+ u2 = URL.from_text("http://localhost/b")
+ self.assertTrue(u1 != u2, "%r == %r" % (u1, u2))
+
+ def test_otherTypesUnequal(self):
+ # type: () -> None
+ """
+ L{URL} is unequal (C{!=}) to other types.
+ """
+ u = URL.from_text("http://localhost/")
+ self.assertTrue(u != 42, "URL must differ from a number.")
+ self.assertTrue(u != object(), "URL must be differ from an object.")
+
+ def test_asURI(self):
+ # type: () -> None
+ """
+ L{URL.asURI} produces an URI which converts any URI unicode encoding
+ into pure US-ASCII and returns a new L{URL}.
+ """
+ unicodey = (
+ "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/"
+ "\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}"
+ "?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}="
+ "\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}"
+ "#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}"
+ )
+ iri = URL.from_text(unicodey)
+ uri = iri.asURI()
+ self.assertEqual(iri.host, "\N{LATIN SMALL LETTER E WITH ACUTE}.com")
+ self.assertEqual(
+ iri.path[0], "\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}"
+ )
+ self.assertEqual(iri.to_text(), unicodey)
+ expectedURI = "http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA"
+ actualURI = uri.to_text()
+ self.assertEqual(
+ actualURI, expectedURI, "%r != %r" % (actualURI, expectedURI)
+ )
+
+ def test_asIRI(self):
+ # type: () -> None
+ """
+ L{URL.asIRI} decodes any percent-encoded text in the URI, making it
+ more suitable for reading by humans, and returns a new L{URL}.
+ """
+ asciiish = "http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA"
+ uri = URL.from_text(asciiish)
+ iri = uri.asIRI()
+ self.assertEqual(uri.host, "xn--9ca.com")
+ self.assertEqual(uri.path[0], "%C3%A9")
+ self.assertEqual(uri.to_text(), asciiish)
+ expectedIRI = (
+ "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/"
+ "\N{LATIN SMALL LETTER E WITH ACUTE}"
+ "?\N{LATIN SMALL LETTER A WITH ACUTE}="
+ "\N{LATIN SMALL LETTER I WITH ACUTE}"
+ "#\N{LATIN SMALL LETTER U WITH ACUTE}"
+ )
+ actualIRI = iri.to_text()
+ self.assertEqual(
+ actualIRI, expectedIRI, "%r != %r" % (actualIRI, expectedIRI)
+ )
+
+ def test_badUTF8AsIRI(self):
+ # type: () -> None
+ """
+ Bad UTF-8 in a path segment, query parameter, or fragment results in
+ that portion of the URI remaining percent-encoded in the IRI.
+ """
+ urlWithBinary = "http://xn--9ca.com/%00%FF/%C3%A9"
+ uri = URL.from_text(urlWithBinary)
+ iri = uri.asIRI()
+ expectedIRI = (
+ "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/"
+ "%00%FF/"
+ "\N{LATIN SMALL LETTER E WITH ACUTE}"
+ )
+ actualIRI = iri.to_text()
+ self.assertEqual(
+ actualIRI, expectedIRI, "%r != %r" % (actualIRI, expectedIRI)
+ )
+
+ def test_alreadyIRIAsIRI(self):
+ # type: () -> None
+ """
+ A L{URL} composed of non-ASCII text will result in non-ASCII text.
+ """
+ unicodey = (
+ "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/"
+ "\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}"
+ "?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}="
+ "\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}"
+ "#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}"
+ )
+ iri = URL.from_text(unicodey)
+ alsoIRI = iri.asIRI()
+ self.assertEqual(alsoIRI.to_text(), unicodey)
+
+ def test_alreadyURIAsURI(self):
+ # type: () -> None
+ """
+ A L{URL} composed of encoded text will remain encoded.
+ """
+ expectedURI = "http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA"
+ uri = URL.from_text(expectedURI)
+ actualURI = uri.asURI().to_text()
+ self.assertEqual(actualURI, expectedURI)
+
+ def test_userinfo(self):
+ # type: () -> None
+ """
+ L{URL.from_text} will parse the C{userinfo} portion of the URI
+ separately from the host and port.
+ """
+ url = URL.from_text(
+ "http://someuser:somepassword@example.com/some-segment@ignore"
+ )
+ self.assertEqual(
+ url.authority(True), "someuser:somepassword@example.com"
+ )
+ self.assertEqual(url.authority(False), "someuser:@example.com")
+ self.assertEqual(url.userinfo, "someuser:somepassword")
+ self.assertEqual(url.user, "someuser")
+ self.assertEqual(
+ url.to_text(), "http://someuser:@example.com/some-segment@ignore"
+ )
+ self.assertEqual(
+ url.replace(userinfo="someuser").to_text(),
+ "http://someuser@example.com/some-segment@ignore",
+ )
+
+ def test_portText(self):
+ # type: () -> None
+ """
+ L{URL.from_text} parses custom port numbers as integers.
+ """
+ portURL = URL.from_text("http://www.example.com:8080/")
+ self.assertEqual(portURL.port, 8080)
+ self.assertEqual(portURL.to_text(), "http://www.example.com:8080/")
+
+ def test_mailto(self):
+ # type: () -> None
+ """
+ Although L{URL} instances are mainly for dealing with HTTP, other
+ schemes (such as C{mailto:}) should work as well. For example,
+ L{URL.from_text}/L{URL.to_text} round-trips cleanly for a C{mailto:}
+ URL representing an email address.
+ """
+ self.assertEqual(
+ URL.from_text("mailto:user@example.com").to_text(),
+ "mailto:user@example.com",
+ )
+
+ def test_httpWithoutHost(self):
+ # type: () -> None
+ """
+ An HTTP URL without a hostname, but with a path, should also round-trip
+ cleanly.
+ """
+ without_host = URL.from_text("http:relative-path")
+ self.assertEqual(without_host.host, "")
+ self.assertEqual(without_host.path, ("relative-path",))
+ self.assertEqual(without_host.uses_netloc, False)
+ self.assertEqual(without_host.to_text(), "http:relative-path")
+
+ def test_queryIterable(self):
+ # type: () -> None
+ """
+ When a L{URL} is created with a C{query} argument, the C{query}
+ argument is converted into an N-tuple of 2-tuples, sensibly
+ handling dictionaries.
+ """
+ expected = (("alpha", "beta"),)
+ url = URL(query=[("alpha", "beta")])
+ self.assertEqual(url.query, expected)
+ url = URL(query={"alpha": "beta"})
+ self.assertEqual(url.query, expected)
+
+ def test_pathIterable(self):
+ # type: () -> None
+ """
+ When a L{URL} is created with a C{path} argument, the C{path} is
+ converted into a tuple.
+ """
+ url = URL(path=["hello", "world"])
+ self.assertEqual(url.path, ("hello", "world"))
+
+ def test_invalidArguments(self):
+ # type: () -> None
+ """
+ Passing an argument of the wrong type to any of the constructor
+ arguments of L{URL} will raise a descriptive L{TypeError}.
+
+ L{URL} typechecks very aggressively to ensure that its constitutent
+ parts are all properly immutable and to prevent confusing errors when
+ bad data crops up in a method call long after the code that called the
+ constructor is off the stack.
+ """
+
+ class Unexpected(object):
+ def __str__(self):
+ # type: () -> str
+ return "wrong"
+
+ def __repr__(self):
+ # type: () -> str
+ return ""
+
+ defaultExpectation = "unicode" if bytes is str else "str"
+
+ def assertRaised(raised, expectation, name):
+ # type: (Any, Text, Text) -> None
+ self.assertEqual(
+ str(raised.exception),
+ "expected {0} for {1}, got {2}".format(
+ expectation, name, ""
+ ),
+ )
+
+ def check(param, expectation=defaultExpectation):
+ # type: (Any, str) -> None
+ with self.assertRaises(TypeError) as raised:
+ URL(**{param: Unexpected()}) # type: ignore[arg-type]
+
+ assertRaised(raised, expectation, param)
+
+ check("scheme")
+ check("host")
+ check("fragment")
+ check("rooted", "bool")
+ check("userinfo")
+ check("port", "int or NoneType")
+
+ with self.assertRaises(TypeError) as raised:
+ URL(path=[cast(Text, Unexpected())])
+
+ assertRaised(raised, defaultExpectation, "path segment")
+
+ with self.assertRaises(TypeError) as raised:
+ URL(query=[("name", cast(Text, Unexpected()))])
+
+ assertRaised(
+ raised, defaultExpectation + " or NoneType", "query parameter value"
+ )
+
+ with self.assertRaises(TypeError) as raised:
+ URL(query=[(cast(Text, Unexpected()), "value")])
+
+ assertRaised(raised, defaultExpectation, "query parameter name")
+ # No custom error message for this one, just want to make sure
+ # non-2-tuples don't get through.
+
+ with self.assertRaises(TypeError):
+ URL(query=[cast(Tuple[Text, Text], Unexpected())])
+
+ with self.assertRaises(ValueError):
+ URL(query=[cast(Tuple[Text, Text], ("k", "v", "vv"))])
+
+ with self.assertRaises(ValueError):
+ URL(query=[cast(Tuple[Text, Text], ("k",))])
+
+ url = URL.from_text("https://valid.example.com/")
+ with self.assertRaises(TypeError) as raised:
+ url.child(cast(Text, Unexpected()))
+ assertRaised(raised, defaultExpectation, "path segment")
+ with self.assertRaises(TypeError) as raised:
+ url.sibling(cast(Text, Unexpected()))
+ assertRaised(raised, defaultExpectation, "path segment")
+ with self.assertRaises(TypeError) as raised:
+ url.click(cast(Text, Unexpected()))
+ assertRaised(raised, defaultExpectation, "relative URL")
+
+ def test_technicallyTextIsIterableBut(self):
+ # type: () -> None
+ """
+ Technically, L{str} (or L{unicode}, as appropriate) is iterable, but
+ C{URL(path="foo")} resulting in C{URL.from_text("f/o/o")} is never what
+ you want.
+ """
+ with self.assertRaises(TypeError) as raised:
+ URL(path="foo")
+ self.assertEqual(
+ str(raised.exception),
+ "expected iterable of text for path, not: {0}".format(repr("foo")),
+ )
+
+ def test_netloc(self):
+ # type: () -> None
+ url = URL(scheme="https")
+ self.assertEqual(url.uses_netloc, True)
+ self.assertEqual(url.to_text(), "https://")
+ # scheme, no host, no path, no netloc hack
+ self.assertEqual(URL.from_text("https:").uses_netloc, False)
+ # scheme, no host, absolute path, no netloc hack
+ self.assertEqual(URL.from_text("https:/").uses_netloc, False)
+ # scheme, no host, no path, netloc hack to indicate :// syntax
+ self.assertEqual(URL.from_text("https://").uses_netloc, True)
+
+ url = URL(scheme="https", uses_netloc=False)
+ self.assertEqual(url.uses_netloc, False)
+ self.assertEqual(url.to_text(), "https:")
+
+ url = URL(scheme="git+https")
+ self.assertEqual(url.uses_netloc, True)
+ self.assertEqual(url.to_text(), "git+https://")
+
+ url = URL(scheme="mailto")
+ self.assertEqual(url.uses_netloc, False)
+ self.assertEqual(url.to_text(), "mailto:")
+
+ url = URL(scheme="ztp")
+ self.assertEqual(url.uses_netloc, None)
+ self.assertEqual(url.to_text(), "ztp:")
+
+ url = URL.from_text("ztp://test.com")
+ self.assertEqual(url.uses_netloc, True)
+
+ url = URL.from_text("ztp:test:com")
+ self.assertEqual(url.uses_netloc, False)
+
+ def test_ipv6_with_port(self):
+ # type: () -> None
+ t = "https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:80/"
+ url = URL.from_text(t)
+ assert url.host == "2001:0db8:85a3:0000:0000:8a2e:0370:7334"
+ assert url.port == 80
+ assert SCHEME_PORT_MAP[url.scheme] != url.port
+
+ def test_basic(self):
+ # type: () -> None
+ text = "https://user:pass@example.com/path/to/here?k=v#nice"
+ url = URL.from_text(text)
+ assert url.scheme == "https"
+ assert url.userinfo == "user:pass"
+ assert url.host == "example.com"
+ assert url.path == ("path", "to", "here")
+ assert url.fragment == "nice"
+
+ text = "https://user:pass@127.0.0.1/path/to/here?k=v#nice"
+ url = URL.from_text(text)
+ assert url.scheme == "https"
+ assert url.userinfo == "user:pass"
+ assert url.host == "127.0.0.1"
+ assert url.path == ("path", "to", "here")
+
+ text = "https://user:pass@[::1]/path/to/here?k=v#nice"
+ url = URL.from_text(text)
+ assert url.scheme == "https"
+ assert url.userinfo == "user:pass"
+ assert url.host == "::1"
+ assert url.path == ("path", "to", "here")
+
+ def test_invalid_url(self):
+ # type: () -> None
+ self.assertRaises(URLParseError, URL.from_text, "#\n\n")
+
+ def test_invalid_authority_url(self):
+ # type: () -> None
+ self.assertRaises(URLParseError, URL.from_text, "http://abc:\n\n/#")
+
+ def test_invalid_ipv6(self):
+ # type: () -> None
+ invalid_ipv6_ips = [
+ "2001::0234:C1ab::A0:aabc:003F",
+ "2001::1::3F",
+ ":",
+ "::::",
+ "::256.0.0.1",
+ ]
+ for ip in invalid_ipv6_ips:
+ url_text = "http://[" + ip + "]"
+ self.assertRaises(socket.error, inet_pton, socket.AF_INET6, ip)
+ self.assertRaises(URLParseError, URL.from_text, url_text)
+
+ def test_invalid_port(self):
+ # type: () -> None
+ self.assertRaises(URLParseError, URL.from_text, "ftp://portmouth:smash")
+ self.assertRaises(
+ ValueError,
+ URL.from_text,
+ "http://reader.googlewebsite.com:neverforget",
+ )
+
+ def test_idna(self):
+ # type: () -> None
+ u1 = URL.from_text("http://bücher.ch")
+ self.assertEqual(u1.host, "bücher.ch")
+ self.assertEqual(u1.to_text(), "http://bücher.ch")
+ self.assertEqual(u1.to_uri().to_text(), "http://xn--bcher-kva.ch")
+
+ u2 = URL.from_text("https://xn--bcher-kva.ch")
+ self.assertEqual(u2.host, "xn--bcher-kva.ch")
+ self.assertEqual(u2.to_text(), "https://xn--bcher-kva.ch")
+ self.assertEqual(u2.to_iri().to_text(), "https://bücher.ch")
+
+ def test_netloc_slashes(self):
+ # type: () -> None
+
+ # basic sanity checks
+ url = URL.from_text("mailto:mahmoud@hatnote.com")
+ self.assertEqual(url.scheme, "mailto")
+ self.assertEqual(url.to_text(), "mailto:mahmoud@hatnote.com")
+
+ url = URL.from_text("http://hatnote.com")
+ self.assertEqual(url.scheme, "http")
+ self.assertEqual(url.to_text(), "http://hatnote.com")
+
+ # test that unrecognized schemes stay consistent with '//'
+ url = URL.from_text("newscheme:a:b:c")
+ self.assertEqual(url.scheme, "newscheme")
+ self.assertEqual(url.to_text(), "newscheme:a:b:c")
+
+ url = URL.from_text("newerscheme://a/b/c")
+ self.assertEqual(url.scheme, "newerscheme")
+ self.assertEqual(url.to_text(), "newerscheme://a/b/c")
+
+ # test that reasonable guesses are made
+ url = URL.from_text("git+ftp://gitstub.biz/glyph/lefkowitz")
+ self.assertEqual(url.scheme, "git+ftp")
+ self.assertEqual(url.to_text(), "git+ftp://gitstub.biz/glyph/lefkowitz")
+
+ url = URL.from_text("what+mailto:freerealestate@enotuniq.org")
+ self.assertEqual(url.scheme, "what+mailto")
+ self.assertEqual(
+ url.to_text(), "what+mailto:freerealestate@enotuniq.org"
+ )
+
+ url = URL(scheme="ztp", path=("x", "y", "z"), rooted=True)
+ self.assertEqual(url.to_text(), "ztp:/x/y/z")
+
+ # also works when the input doesn't include '//'
+ url = URL(
+ scheme="git+ftp",
+ path=("x", "y", "z", ""),
+ rooted=True,
+ uses_netloc=True,
+ )
+ # broken bc urlunsplit
+ self.assertEqual(url.to_text(), "git+ftp:///x/y/z/")
+
+ # really why would this ever come up but ok
+ url = URL.from_text("file:///path/to/heck")
+ url2 = url.replace(scheme="mailto")
+ self.assertEqual(url2.to_text(), "mailto:/path/to/heck")
+
+ url_text = "unregisteredscheme:///a/b/c"
+ url = URL.from_text(url_text)
+ no_netloc_url = url.replace(uses_netloc=False)
+ self.assertEqual(no_netloc_url.to_text(), "unregisteredscheme:/a/b/c")
+ netloc_url = url.replace(uses_netloc=True)
+ self.assertEqual(netloc_url.to_text(), url_text)
+
+ return
+
+ def test_rooted_to_relative(self):
+ # type: () -> None
+ """
+ On host-relative URLs, the C{rooted} flag can be updated to indicate
+ that the path should no longer be treated as absolute.
+ """
+ a = URL(path=["hello"])
+ self.assertEqual(a.to_text(), "hello")
+ b = a.replace(rooted=True)
+ self.assertEqual(b.to_text(), "/hello")
+ self.assertNotEqual(a, b)
+
+ def test_autorooted(self):
+ # type: () -> None
+ """
+ The C{rooted} flag can be updated in some cases, but it cannot be made
+ to conflict with other facts surrounding the URL; for example, all URLs
+ involving an authority (host) are inherently rooted because it is not
+ syntactically possible to express otherwise; also, once an unrooted URL
+ gains a path that starts with an empty string, that empty string is
+ elided and it becomes rooted, because these cases are syntactically
+ indistinguisable in real URL text.
+ """
+ relative_path_rooted = URL(path=["", "foo"], rooted=False)
+ self.assertEqual(relative_path_rooted.rooted, True)
+ relative_flag_rooted = URL(path=["foo"], rooted=True)
+ self.assertEqual(relative_flag_rooted.rooted, True)
+ self.assertEqual(relative_path_rooted, relative_flag_rooted)
+
+ attempt_unrooted_absolute = URL(host="foo", path=["bar"], rooted=False)
+ normal_absolute = URL(host="foo", path=["bar"])
+ self.assertEqual(attempt_unrooted_absolute, normal_absolute)
+ self.assertEqual(normal_absolute.rooted, True)
+ self.assertEqual(attempt_unrooted_absolute.rooted, True)
+
+ def test_rooted_with_port_but_no_host(self):
+ # type: () -> None
+ """
+ URLs which include a ``://`` netloc-separator for any reason are
+ inherently rooted, regardless of the value or presence of the
+ ``rooted`` constructor argument.
+
+ They may include a netloc-separator because their constructor was
+ directly invoked with an explicit host or port, or because they were
+ parsed from a string which included the literal ``://`` separator.
+ """
+ directly_constructed = URL(scheme="udp", port=4900, rooted=False)
+ directly_constructed_implict = URL(scheme="udp", port=4900)
+ directly_constructed_rooted = URL(scheme="udp", port=4900, rooted=True)
+ self.assertEqual(directly_constructed.rooted, True)
+ self.assertEqual(directly_constructed_implict.rooted, True)
+ self.assertEqual(directly_constructed_rooted.rooted, True)
+ parsed = URL.from_text("udp://:4900")
+ self.assertEqual(str(directly_constructed), str(parsed))
+ self.assertEqual(str(directly_constructed_implict), str(parsed))
+ self.assertEqual(directly_constructed.asText(), parsed.asText())
+ self.assertEqual(directly_constructed, parsed)
+ self.assertEqual(directly_constructed, directly_constructed_implict)
+ self.assertEqual(directly_constructed, directly_constructed_rooted)
+ self.assertEqual(directly_constructed_implict, parsed)
+ self.assertEqual(directly_constructed_rooted, parsed)
+
+ def test_wrong_constructor(self):
+ # type: () -> None
+ with self.assertRaises(ValueError):
+ # whole URL not allowed
+ URL(BASIC_URL)
+ with self.assertRaises(ValueError):
+ # explicitly bad scheme not allowed
+ URL("HTTP_____more_like_imHoTTeP")
+
+ def test_encoded_userinfo(self):
+ # type: () -> None
+ url = URL.from_text("http://user:pass@example.com")
+ assert url.userinfo == "user:pass"
+ url = url.replace(userinfo="us%20her:pass")
+ iri = url.to_iri()
+ assert (
+ iri.to_text(with_password=True) == "http://us her:pass@example.com"
+ )
+ assert iri.to_text(with_password=False) == "http://us her:@example.com"
+ assert (
+ iri.to_uri().to_text(with_password=True)
+ == "http://us%20her:pass@example.com"
+ )
+
+ def test_hash(self):
+ # type: () -> None
+ url_map = {}
+ url1 = URL.from_text("http://blog.hatnote.com/ask?utm_source=geocity")
+ assert hash(url1) == hash(url1) # sanity
+
+ url_map[url1] = 1
+
+ url2 = URL.from_text("http://blog.hatnote.com/ask")
+ url2 = url2.set("utm_source", "geocity")
+
+ url_map[url2] = 2
+
+ assert len(url_map) == 1
+ assert list(url_map.values()) == [2]
+
+ assert hash(URL()) == hash(URL()) # slightly more sanity
+
+ def test_dir(self):
+ # type: () -> None
+ url = URL()
+ res = dir(url)
+
+ assert len(res) > 15
+ # twisted compat
+ assert "fromText" not in res
+ assert "asText" not in res
+ assert "asURI" not in res
+ assert "asIRI" not in res
+
+ def test_twisted_compat(self):
+ # type: () -> None
+ url = URL.fromText("http://example.com/a%20té%C3%A9st")
+ assert url.asText() == "http://example.com/a%20té%C3%A9st"
+ assert url.asURI().asText() == "http://example.com/a%20t%C3%A9%C3%A9st"
+ # TODO: assert url.asIRI().asText() == u'http://example.com/a%20téést'
+
+ def test_set_ordering(self):
+ # type: () -> None
+
+ # TODO
+ url = URL.from_text("http://example.com/?a=b&c")
+ url = url.set("x", "x")
+ url = url.add("x", "y")
+ assert url.to_text() == "http://example.com/?a=b&x=x&c&x=y"
+ # Would expect:
+ # assert url.to_text() == u'http://example.com/?a=b&c&x=x&x=y'
+
+ def test_schemeless_path(self):
+ # type: () -> None
+ "See issue #4"
+ u1 = URL.from_text("urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob")
+ u2 = URL.from_text(u1.to_text())
+ assert u1 == u2 # sanity testing roundtripping
+
+ u3 = URL.from_text(u1.to_iri().to_text())
+ assert u1 == u3
+ assert u2 == u3
+
+ # test that colons are ok past the first segment
+ u4 = URL.from_text("first-segment/urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob")
+ u5 = u4.to_iri()
+ assert u5.to_text() == "first-segment/urn:ietf:wg:oauth:2.0:oob"
+
+ u6 = URL.from_text(u5.to_text()).to_uri()
+ assert u5 == u6 # colons stay decoded bc they're not in the first seg
+
+ def test_emoji_domain(self):
+ # type: () -> None
+ "See issue #7, affecting only narrow builds (2.6-3.3)"
+ url = URL.from_text("https://xn--vi8hiv.ws")
+ iri = url.to_iri()
+ iri.to_text()
+ # as long as we don't get ValueErrors, we're good
+
+ def test_delim_in_param(self):
+ # type: () -> None
+ "Per issue #6 and #8"
+ self.assertRaises(ValueError, URL, scheme="http", host="a/c")
+ self.assertRaises(ValueError, URL, path=("?",))
+ self.assertRaises(ValueError, URL, path=("#",))
+ self.assertRaises(ValueError, URL, query=(("&", "test")))
+
+ def test_empty_paths_eq(self):
+ # type: () -> None
+ u1 = URL.from_text("http://example.com/")
+ u2 = URL.from_text("http://example.com")
+
+ assert u1 == u2
+
+ u1 = URL.from_text("http://example.com")
+ u2 = URL.from_text("http://example.com")
+
+ assert u1 == u2
+
+ u1 = URL.from_text("http://example.com")
+ u2 = URL.from_text("http://example.com/")
+
+ assert u1 == u2
+
+ u1 = URL.from_text("http://example.com/")
+ u2 = URL.from_text("http://example.com/")
+
+ assert u1 == u2
+
+ def test_from_text_type(self):
+ # type: () -> None
+ assert URL.from_text("#ok").fragment == "ok" # sanity
+ self.assertRaises(TypeError, URL.from_text, b"bytes://x.y.z")
+ self.assertRaises(TypeError, URL.from_text, object())
+
+ def test_from_text_bad_authority(self):
+ # type: () -> None
+
+ # bad ipv6 brackets
+ self.assertRaises(URLParseError, URL.from_text, "http://[::1/")
+ self.assertRaises(URLParseError, URL.from_text, "http://::1]/")
+ self.assertRaises(URLParseError, URL.from_text, "http://[[::1]/")
+ self.assertRaises(URLParseError, URL.from_text, "http://[::1]]/")
+
+ # empty port
+ self.assertRaises(URLParseError, URL.from_text, "http://127.0.0.1:")
+ # non-integer port
+ self.assertRaises(URLParseError, URL.from_text, "http://127.0.0.1:hi")
+ # extra port colon (makes for an invalid host)
+ self.assertRaises(URLParseError, URL.from_text, "http://127.0.0.1::80")
+
+ def test_normalize(self):
+ # type: () -> None
+ url = URL.from_text("HTTP://Example.com/A%61/./../A%61?B%62=C%63#D%64")
+ assert url.get("Bb") == []
+ assert url.get("B%62") == ["C%63"]
+ assert len(url.path) == 4
+
+ # test that most expected normalizations happen
+ norm_url = url.normalize()
+
+ assert norm_url.scheme == "http"
+ assert norm_url.host == "example.com"
+ assert norm_url.path == ("Aa",)
+ assert norm_url.get("Bb") == ["Cc"]
+ assert norm_url.fragment == "Dd"
+ assert norm_url.to_text() == "http://example.com/Aa?Bb=Cc#Dd"
+
+ # test that flags work
+ noop_norm_url = url.normalize(
+ scheme=False, host=False, path=False, query=False, fragment=False
+ )
+ assert noop_norm_url == url
+
+ # test that empty paths get at least one slash
+ slashless_url = URL.from_text("http://example.io")
+ slashful_url = slashless_url.normalize()
+ assert slashful_url.to_text() == "http://example.io/"
+
+ # test case normalization for percent encoding
+ delimited_url = URL.from_text("/a%2fb/cd%3f?k%3d=v%23#test")
+ norm_delimited_url = delimited_url.normalize()
+ assert norm_delimited_url.to_text() == "/a%2Fb/cd%3F?k%3D=v%23#test"
+
+ # test invalid percent encoding during normalize
+ assert (
+ URL(path=("", "%te%sts")).normalize(percents=False).to_text()
+ == "/%te%sts"
+ )
+ assert URL(path=("", "%te%sts")).normalize().to_text() == "/%25te%25sts"
+
+ percenty_url = URL(
+ scheme="ftp",
+ path=["%%%", "%a%b"],
+ query=[("%", "%%")],
+ fragment="%",
+ userinfo="%:%",
+ )
+
+ assert (
+ percenty_url.to_text(with_password=True)
+ == "ftp://%:%@/%%%/%a%b?%=%%#%"
+ )
+ assert (
+ percenty_url.normalize().to_text(with_password=True)
+ == "ftp://%25:%25@/%25%25%25/%25a%25b?%25=%25%25#%25"
+ )
+
+ def test_str(self):
+ # type: () -> None
+
+ # see also issue #49
+ text = "http://example.com/á/y%20a%20y/?b=%25"
+ url = URL.from_text(text)
+ assert unicode(url) == text
+ assert bytes(url) == b"http://example.com/%C3%A1/y%20a%20y/?b=%25"
+
+ if PY2:
+ assert isinstance(str(url), bytes)
+ assert isinstance(unicode(url), unicode)
+ else:
+ assert isinstance(str(url), unicode)
+ assert isinstance(bytes(url), bytes)
+
+ def test_idna_corners(self):
+ # type: () -> None
+ url = URL.from_text("http://abé.com/")
+ assert url.to_iri().host == "abé.com"
+ assert url.to_uri().host == "xn--ab-cja.com"
+
+ url = URL.from_text("http://ドメイン.テスト.co.jp#test")
+ assert url.to_iri().host == "ドメイン.テスト.co.jp"
+ assert url.to_uri().host == "xn--eckwd4c7c.xn--zckzah.co.jp"
+
+ assert url.to_uri().get_decoded_url().host == "ドメイン.テスト.co.jp"
+
+ text = "http://Example.com"
+ assert (
+ URL.from_text(text).to_uri().get_decoded_url().host == "example.com"
+ )
diff --git a/tox.ini b/tox.ini
index ef2ec9c5..2165a835 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,26 +1,395 @@
[tox]
-envlist = py26,py27,py34,py35,py36,pypy,coverage-report,packaging
+
+envlist =
+ flake8, black, mypy
+ test-py{26,27,34,35,36,37,38,39,py2,py3}
+ coverage_report
+ docs
+ packaging
+
+skip_missing_interpreters = {tty:True:False}
+
+
+[default]
+
+basepython = python3.9
+
+deps =
+ idna==2.9 # rq.filter: <3
+
+setenv =
+ PY_MODULE=hyperlink
+
+ PYTHONPYCACHEPREFIX={envtmpdir}/pycache
+
+
+##
+# Default environment: unit tests
+##
[testenv]
-changedir = .tox
-deps = -rrequirements-test.txt
-commands = coverage run --parallel --rcfile {toxinidir}/.tox-coveragerc -m pytest --doctest-modules {envsitepackagesdir}/hyperlink {posargs}
-# Uses default basepython otherwise reporting doesn't work on Travis where
-# Python 3.6 is only available in 3.6 jobs.
-[testenv:coverage-report]
-changedir = .tox
-deps = coverage
-commands = coverage combine --rcfile {toxinidir}/.tox-coveragerc
- coverage report --rcfile {toxinidir}/.tox-coveragerc
- coverage html --rcfile {toxinidir}/.tox-coveragerc -d {toxinidir}/htmlcov
+description = run tests
+
+basepython =
+ py: python
+
+ py26: python2.6
+ py27: python2.7
+ py34: python3.4
+ py35: python3.5
+ py36: python3.6
+ py37: python3.7
+ py38: python3.8
+ py39: python3.9
+ py310: python3.10
+ py311: python3.11
+ py312: python3.12
+ py313: python3.13
+
+ pypy2: pypy
+ pypy3: pypy3
+
+deps =
+ {[default]deps}
+
+ # In Python 2, we need to pull in typing, mock
+ py{26,27,py2}: typing==3.10.0.0
+ py{26,27,py2}: mock==3.0.5 # rq.filter: <4
+
+ # For pytest
+ py{26,27,34,py2}: pytest==4.6.11 # rq.filter: <5
+ py{35,36,37,38,39,310,311,312,313,py3}: pytest==5.2.4
+
+ # For code coverage
+ {[testenv:coverage_report]deps}
+ py{26,27,34,py2}: pytest-cov==2.8.1 # rq.filter: <2.9
+ py{35,36,37,38,39,310,311,312,313,py3}: pytest-cov==2.10.1
+
+ # For hypothesis. Note Python 3.4 isn't supported by hypothesis.
+ py{26,27,py2}: hypothesis==4.43.9 # rq.filter: <4.44
+ py{35,36,37,38,39,310,311,312,313,py3}: hypothesis==5.8.6
+
+setenv =
+ {[default]setenv}
+
+ COVERAGE_FILE={toxworkdir}/coverage.{envname}
+ HYPOTHESIS_STORAGE_DIRECTORY={toxworkdir}/hypothesis
+
+passenv = CI
+
+commands =
+ pytest --cov={env:PY_MODULE} --cov-report=term-missing:skip-covered --doctest-modules {posargs:src/{env:PY_MODULE}}
+ coverage_xml: coverage xml
+
+
+##
+# Black code formatting
+##
+
+[testenv:black]
+
+description = run Black (linter)
+
+basepython = {[default]basepython}
+
+skip_install = True
+
+deps =
+ black==21.7b0
+
+setenv =
+ BLACK_LINT_ARGS=--check
+
+commands =
+ black {env:BLACK_LINT_ARGS:} {posargs:setup.py src}
+
+
+[testenv:black-reformat]
+
+description = {[testenv:black]description} and reformat
+basepython = {[testenv:black]basepython}
+skip_install = {[testenv:black]skip_install}
+deps = {[testenv:black]deps}
+commands = {[testenv:black]commands}
+
+
+##
+# Flake8 linting
+##
+
+[testenv:flake8]
+
+description = run Flake8 (linter)
+
+basepython = {[default]basepython}
+
+skip_install = True
+
+deps =
+ flake8-bugbear==21.4.3
+ flake8==3.9.2
+ mccabe==0.6.1
+ pep8-naming==0.12.1
+ pycodestyle==2.7.0
+ pydocstyle==6.1.1
+ pyflakes==2.3.1
+
+commands =
+ flake8 {posargs:setup.py src/{env:PY_MODULE}}
+
+
+[flake8]
+
+# !!! BRING THE PAIN !!!
+select = A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z
+
+show-source = True
+doctests = True
+
+max-line-length = 80
+
+# Codes: http://flake8.pycqa.org/en/latest/user/error-codes.html
+ignore =
+ # syntax error in type comment
+ F723,
+
+ # function name should be lowercase
+ N802,
+
+ # argument name should be lowercase
+ N803,
+
+ # variable in function should be lowercase
+ N806,
+
+ # variable in class scope should not be mixedCase
+ N815,
+
+ # variable in global scope should not be mixedCase
+ N816,
+
+ # line break before binary operator
+ W503,
+
+ # End of list (allows last item to end with trailing ',')
+ EOL
+
+# flake8-import-order: local module name space
+application-import-names = deploy
+
+
+##
+# Mypy static type checking
+##
+
+[testenv:mypy]
+
+description = run Mypy (static type checker)
+
+basepython = {[default]basepython}
+
+deps =
+ mypy==0.910
+ types-mock==0.1.5
+
+ {[default]deps}
+commands =
+ mypy \
+ --config-file="{toxinidir}/tox.ini" \
+ --cache-dir="{toxworkdir}/mypy_cache" \
+ {tty:--pretty:} \
+ {posargs:src}
+
+
+[mypy]
+
+# Global settings
+
+check_untyped_defs = True
+disallow_any_generics = True
+disallow_incomplete_defs = True
+disallow_untyped_defs = True
+no_implicit_optional = True
+show_column_numbers = True
+show_error_codes = True
+strict_optional = True
+warn_no_return = True
+warn_redundant_casts = True
+warn_return_any = True
+warn_unreachable = True
+warn_unused_ignores = True
+
+# DrawCallable is generic
+
+[mypy-hyperlink.hypothesis]
+disallow_any_generics = False
+[mypy-hyperlink.test.test_hypothesis]
+disallow_any_generics = False
+
+# Don't complain about dependencies known to lack type hints
+
+[mypy-hypothesis]
+ignore_missing_imports = True
+[mypy-hypothesis.*]
+ignore_missing_imports = True
+
+[mypy-idna]
+ignore_missing_imports = True
+
+
+##
+# Coverage report
+##
+
+[testenv:coverage_report]
+
+description = generate coverage report
+
+depends = test-py{26,27,34,35,36,37,38,39,310,311,312,313,py2,py3}
+
+basepython = {[default]basepython}
+
+skip_install = True
+
+deps =
+ # coverage 5.0 drops Python 3.4 support
+ coverage==4.5.4 # rq.filter: <5
+
+setenv =
+ {[default]setenv}
+
+ COVERAGE_FILE={toxworkdir}/coverage
+
+commands =
+ coverage combine
+ - coverage report
+ - coverage html
+
+
+##
+# Codecov
+##
+
+[testenv:codecov]
+
+description = upload coverage to Codecov
+
+depends = {[coverage_report]depends}
+
+basepython = python
+
+skip_install = True
+
+deps =
+ {[testenv:coverage_report]deps}
+ codecov==2.1.12
+
+passenv =
+ # See https://github.com/codecov/codecov-python/blob/master/README.md#using-tox
+ # And CI-specific docs:
+ # https://help.github.com/en/articles/virtual-environments-for-github-actions#default-environment-variables
+ # https://docs.travis-ci.com/user/environment-variables#default-environment-variables
+ # https://www.appveyor.com/docs/environment-variables/
+ TOXENV CODECOV_* CI
+ GITHUB_*
+ TRAVIS TRAVIS_*
+ APPVEYOR APPVEYOR_*
+
+setenv =
+ {[testenv:coverage_report]setenv}
+
+ COVERAGE_XML={envlogdir}/coverage.xml
+
+commands =
+ # Note documentation for CI variables in passenv above
+ coverage combine
+ coverage xml -o "{env:COVERAGE_XML}"
+ codecov --file="{env:COVERAGE_XML}" --env \
+ GITHUB_REF GITHUB_COMMIT GITHUB_USER GITHUB_WORKFLOW \
+ TRAVIS_BRANCH TRAVIS_BUILD_WEB_URL \
+ TRAVIS_COMMIT TRAVIS_COMMIT_MESSAGE \
+ APPVEYOR_REPO_BRANCH APPVEYOR_REPO_COMMIT \
+ APPVEYOR_REPO_COMMIT_AUTHOR_EMAIL \
+ APPVEYOR_REPO_COMMIT_MESSAGE_EXTENDED
+
+
+##
+# Documentation
+##
+
+[testenv:docs]
+
+description = build documentation
+
+basepython = {[default]basepython}
+
+deps =
+ Sphinx==4.1.2
+ sphinx-rtd-theme==0.5.2
+
+commands =
+ sphinx-build \
+ -b html -d "{envtmpdir}/doctrees" \
+ "{toxinidir}/docs" \
+ "{toxinidir}/htmldocs"
+
+
+[testenv:docs-auto]
+
+description = build documentation and rebuild automatically
+
+basepython = {[default]basepython}
+
+deps =
+ {[testenv:docs]deps}
+ sphinx-autobuild==2021.3.14
+
+commands =
+ sphinx-autobuild \
+ -b html -d "{envtmpdir}/doctrees" \
+ --host=localhost \
+ "{toxinidir}/docs" \
+ "{toxinidir}/htmldocs"
+
+
+##
+# Packaging
+##
[testenv:packaging]
-changedir = {toxinidir}
+
+description = check for potential packaging problems
+
+basepython = {[default]basepython}
+
+skip_install = True
+
deps =
- check-manifest==0.35
- readme_renderer==17.2
+ check-manifest==0.46
+ readme-renderer==29.0
+ twine==3.4.2
+
commands =
check-manifest
- python setup.py check --metadata --restructuredtext --strict
+ pip wheel --wheel-dir "{envtmpdir}/dist" --no-deps {toxinidir}
+ twine check "{envtmpdir}/dist/"*
+
+
+##
+# Print dependencies
+##
+
+[testenv:dependencies]
+
+description = print dependencies
+
+basepython = {[default]basepython}
+
+recreate = true
+
+deps =
+
+commands =
+ pip freeze --exclude={env:PY_MODULE}