From 294ac41fd0a2863af0809fff0fdd2a3fe2f0ad1e Mon Sep 17 00:00:00 2001 From: Fantix King Date: Sun, 25 Apr 2021 11:26:00 -0400 Subject: [PATCH] WIP: use the http::Uri crate --- .gitignore | 6 +- MANIFEST.in | 2 + httptools/parser/url_cparser.pxd | 31 ----- httptools/parser/url_parser.pyx | 108 ----------------- httptools/parser/url_parser/Cargo.lock | 158 +++++++++++++++++++++++++ httptools/parser/url_parser/Cargo.toml | 19 +++ httptools/parser/url_parser/src/lib.rs | 73 ++++++++++++ pyproject.toml | 2 + setup.py | 40 +++---- 9 files changed, 272 insertions(+), 167 deletions(-) delete mode 100644 httptools/parser/url_cparser.pxd delete mode 100644 httptools/parser/url_parser.pyx create mode 100644 httptools/parser/url_parser/Cargo.lock create mode 100644 httptools/parser/url_parser/Cargo.toml create mode 100644 httptools/parser/url_parser/src/lib.rs create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index 284ec93..45f6882 100644 --- a/.gitignore +++ b/.gitignore @@ -29,5 +29,7 @@ __pycache__/ /.pytest_cache /.mypy_cache /.vscode -.eggs -.venv +/.eggs +/.venv +/.python-version +/httptools/parser/url_parser/target/ diff --git a/MANIFEST.in b/MANIFEST.in index 6b2e857..fc03be7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,4 @@ recursive-include vendor *.c *.h LICENSE* README* include MANIFEST.in LICENSE +include httptools/parser/url_parser/Cargo.toml +recursive-include httptools/parser/url_parser/src * diff --git a/httptools/parser/url_cparser.pxd b/httptools/parser/url_cparser.pxd deleted file mode 100644 index ab9265a..0000000 --- a/httptools/parser/url_cparser.pxd +++ /dev/null @@ -1,31 +0,0 @@ -from libc.stdint cimport uint16_t - - -cdef extern from "http_parser.h": - # URL Parser - - enum http_parser_url_fields: - UF_SCHEMA = 0, - UF_HOST = 1, - UF_PORT = 2, - UF_PATH = 3, - UF_QUERY = 4, - UF_FRAGMENT = 5, - UF_USERINFO = 6, - UF_MAX = 7 - - struct http_parser_url_field_data: - uint16_t off - uint16_t len - - struct http_parser_url: - uint16_t field_set - uint16_t port - http_parser_url_field_data[UF_MAX] field_data - - void http_parser_url_init(http_parser_url *u) - - int http_parser_parse_url(const char *buf, - size_t buflen, - int is_connect, - http_parser_url *u) diff --git a/httptools/parser/url_parser.pyx b/httptools/parser/url_parser.pyx deleted file mode 100644 index 49908f3..0000000 --- a/httptools/parser/url_parser.pyx +++ /dev/null @@ -1,108 +0,0 @@ -#cython: language_level=3 - -from __future__ import print_function -from cpython.mem cimport PyMem_Malloc, PyMem_Free -from cpython cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, \ - Py_buffer - -from .errors import HttpParserInvalidURLError - -cimport cython -from . cimport url_cparser as uparser - -__all__ = ('parse_url',) - -@cython.freelist(250) -cdef class URL: - cdef readonly bytes schema - cdef readonly bytes host - cdef readonly object port - cdef readonly bytes path - cdef readonly bytes query - cdef readonly bytes fragment - cdef readonly bytes userinfo - - def __cinit__(self, bytes schema, bytes host, object port, bytes path, - bytes query, bytes fragment, bytes userinfo): - - self.schema = schema - self.host = host - self.port = port - self.path = path - self.query = query - self.fragment = fragment - self.userinfo = userinfo - - def __repr__(self): - return ('' - .format(self.schema, self.host, self.port, self.path, - self.query, self.fragment, self.userinfo)) - - -def parse_url(url): - cdef: - Py_buffer py_buf - char* buf_data - uparser.http_parser_url* parsed - int res - bytes schema = None - bytes host = None - object port = None - bytes path = None - bytes query = None - bytes fragment = None - bytes userinfo = None - object result = None - int off - int ln - - parsed = \ - PyMem_Malloc(sizeof(uparser.http_parser_url)) - uparser.http_parser_url_init(parsed) - - PyObject_GetBuffer(url, &py_buf, PyBUF_SIMPLE) - try: - buf_data = py_buf.buf - res = uparser.http_parser_parse_url(buf_data, py_buf.len, 0, parsed) - - if res == 0: - if parsed.field_set & (1 << uparser.UF_SCHEMA): - off = parsed.field_data[uparser.UF_SCHEMA].off - ln = parsed.field_data[uparser.UF_SCHEMA].len - schema = buf_data[off:off+ln] - - if parsed.field_set & (1 << uparser.UF_HOST): - off = parsed.field_data[uparser.UF_HOST].off - ln = parsed.field_data[uparser.UF_HOST].len - host = buf_data[off:off+ln] - - if parsed.field_set & (1 << uparser.UF_PORT): - port = parsed.port - - if parsed.field_set & (1 << uparser.UF_PATH): - off = parsed.field_data[uparser.UF_PATH].off - ln = parsed.field_data[uparser.UF_PATH].len - path = buf_data[off:off+ln] - - if parsed.field_set & (1 << uparser.UF_QUERY): - off = parsed.field_data[uparser.UF_QUERY].off - ln = parsed.field_data[uparser.UF_QUERY].len - query = buf_data[off:off+ln] - - if parsed.field_set & (1 << uparser.UF_FRAGMENT): - off = parsed.field_data[uparser.UF_FRAGMENT].off - ln = parsed.field_data[uparser.UF_FRAGMENT].len - fragment = buf_data[off:off+ln] - - if parsed.field_set & (1 << uparser.UF_USERINFO): - off = parsed.field_data[uparser.UF_USERINFO].off - ln = parsed.field_data[uparser.UF_USERINFO].len - userinfo = buf_data[off:off+ln] - - return URL(schema, host, port, path, query, fragment, userinfo) - else: - raise HttpParserInvalidURLError("invalid url {!r}".format(url)) - finally: - PyBuffer_Release(&py_buf) - PyMem_Free(parsed) diff --git a/httptools/parser/url_parser/Cargo.lock b/httptools/parser/url_parser/Cargo.lock new file mode 100644 index 0000000..538f1c9 --- /dev/null +++ b/httptools/parser/url_parser/Cargo.lock @@ -0,0 +1,158 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "bytes" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c" +dependencies = [ + "byteorder", + "iovec", +] + +[[package]] +name = "cpython" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f11357af68648b6a227e7e2384d439cec8595de65970f45e3f7f4b2600be472" +dependencies = [ + "libc", + "num-traits", + "paste", + "python3-sys", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "http" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6ccf5ede3a895d8856620237b2f02972c1bbc78d2965ad7fe8838d4a0ed41f0" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "iovec" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" +dependencies = [ + "libc", +] + +[[package]] +name = "itoa" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" + +[[package]] +name = "libc" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714" + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "paste" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" +dependencies = [ + "paste-impl", + "proc-macro-hack", +] + +[[package]] +name = "paste-impl" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" +dependencies = [ + "proc-macro-hack", +] + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "python3-sys" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b29b99c6868eb02beb3bf6ed025c8bcdf02efc149b8e80347d3e5d059a806db" +dependencies = [ + "libc", + "regex", +] + +[[package]] +name = "regex" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548" + +[[package]] +name = "url_parser" +version = "0.1.0" +dependencies = [ + "bytes", + "cpython", + "http", +] diff --git a/httptools/parser/url_parser/Cargo.toml b/httptools/parser/url_parser/Cargo.toml new file mode 100644 index 0000000..8997eb3 --- /dev/null +++ b/httptools/parser/url_parser/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "url_parser" +license = "MIT" +version = "0.1.0" +authors = ["MagicStack Inc. "] +edition = "2018" + +[dependencies] +http = "0.1" +bytes = "0.4" + +[dependencies.cpython] +version = "0.5.2" +features = ["extension-module"] + +[lib] +crate-type = ["lib", "cdylib"] +name = "url_parser" +path = "src/lib.rs" diff --git a/httptools/parser/url_parser/src/lib.rs b/httptools/parser/url_parser/src/lib.rs new file mode 100644 index 0000000..1838276 --- /dev/null +++ b/httptools/parser/url_parser/src/lib.rs @@ -0,0 +1,73 @@ +use cpython::{py_class, py_fn, py_module_initializer, PythonObject}; +use cpython::{PyErr, PyResult, Python, ToPyObject, PyBytes, PyObject, PyString}; +use cpython::exc::NotImplementedError; +use http::Uri; +use bytes::Bytes; + +py_module_initializer!(url_parser, |py, m| { + m.add(py, "__doc__", "This module is implemented in Rust.")?; + m.add(py, "__all__", ("parse_url",).to_py_object(py))?; + m.add(py, "URL", py.get_type::())?; + m.add(py, "parse_url", py_fn!(py, parse_url(url: &[u8])))?; + Ok(()) +}); + +py_class!(pub class URL |py| { + data uri: Uri; + + @property def schema(&self) -> PyResult { + match self.uri(py).scheme_str() { + Some(scheme) => Ok(PyBytes::new(py, scheme.as_bytes()).into_object()), + None => Ok(py.None()), + } + } + + @property def host(&self) -> PyResult { + match self.uri(py).host() { + Some(host) => Ok(PyBytes::new(py, host.as_bytes()).into_object()), + None => Ok(py.None()), + } + } + + @property def port(&self) -> PyResult { + match self.uri(py).port_u16() { + Some(port) => Ok(port.to_py_object(py).into_object()), + None => Ok(py.None()), + } + } + + @property def path(&self) -> PyResult { + Ok(self.uri(py).path().to_py_object(py)) + } + + @property def query(&self) -> PyResult { + match self.uri(py).query() { + Some(query) => Ok(PyBytes::new(py, query.as_bytes()).into_object()), + None => Ok(py.None()), + } + } + + @property def fragment(&self) -> PyResult { + Err(PyErr::new::(py, "fragment is not implemented")) + } + + @property def userinfo(&self) -> PyResult { + Err(PyErr::new::(py, "userinfo is not implemented")) + } +}); + +fn get_invalid_url_error(py: Python, message: String) -> PyResult { + let errors = py.import("httptools.parser.errors")?; + let url_error = errors.get(py, "HttpParserInvalidURLError")?.extract(py)?; + Ok(PyErr::new_lazy_init( + url_error, + Some(message.to_py_object(py).into_object()) + )) +} + +fn parse_url(py: Python, url: &[u8]) -> PyResult { + match Uri::from_shared(Bytes::from(url)) { + Ok(uri) => URL::create_instance(py, uri), + Err(e) => Err(get_invalid_url_error(py, e.to_string())?) + } +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..923975f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[build-system] +requires = ["setuptools", "setuptools-rust", "wheel"] diff --git a/setup.py b/setup.py index 9752a54..34ee707 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,7 @@ ROOT = pathlib.Path(__file__).parent CYTHON_DEPENDENCY = 'Cython==0.29.22' +RUST_DEPENDENCY = 'setuptools-rust~=0.12' class httptools_build_ext(build_ext): @@ -28,15 +29,12 @@ class httptools_build_ext(build_ext): 'Cythion compiler directives'), ('use-system-llhttp', None, 'Use the system provided llhttp, instead of the bundled one'), - ('use-system-http-parser', None, - 'Use the system provided http-parser, instead of the bundled one'), ] boolean_options = build_ext.boolean_options + [ 'cython-always', 'cython-annotate', 'use-system-llhttp', - 'use-system-http-parser', ] def initialize_options(self): @@ -48,7 +46,6 @@ def initialize_options(self): super().initialize_options() self.use_system_llhttp = False - self.use_system_http_parser = False self.cython_always = False self.cython_annotate = None self.cython_directives = None @@ -112,7 +109,7 @@ def finalize_options(self): self._initialized = True def build_extensions(self): - mod_parser, mod_url_parser = self.distribution.ext_modules + mod_parser = self.distribution.ext_modules[0] if self.use_system_llhttp: mod_parser.libraries.append('llhttp') @@ -129,19 +126,6 @@ def build_extensions(self): mod_parser.sources.append('vendor/llhttp/src/http.c') mod_parser.sources.append('vendor/llhttp/src/llhttp.c') - if self.use_system_http_parser: - mod_url_parser.libraries.append('http_parser') - - if sys.platform == 'darwin' and \ - os.path.exists('/opt/local/include'): - # Support macports on Mac OS X. - mod_url_parser.include_dirs.append('/opt/local/include') - else: - mod_url_parser.include_dirs.append( - str(ROOT / 'vendor' / 'http-parser')) - mod_url_parser.sources.append( - 'vendor/http-parser/http_parser.c') - super().build_extensions() @@ -160,7 +144,7 @@ def build_extensions(self): 'unable to read the version from httptools/_version.py') -setup_requires = [] +setup_requires = [RUST_DEPENDENCY] if (not (ROOT / 'httptools' / 'parser' / 'parser.c').exists() or '--cython-always' in sys.argv): @@ -168,6 +152,16 @@ def build_extensions(self): setup_requires.append(CYTHON_DEPENDENCY) +def get_rust_extensions(): + import setuptools_rust + + yield setuptools_rust.RustExtension( + "httptools.parser.url_parser", + path="httptools/parser/url_parser/Cargo.toml", + binding=setuptools_rust.Binding.RustCPython, + ) + + setup( name='httptools', version=VERSION, @@ -201,14 +195,8 @@ def build_extensions(self): ], extra_compile_args=CFLAGS, ), - Extension( - "httptools.parser.url_parser", - sources=[ - "httptools/parser/url_parser.pyx", - ], - extra_compile_args=CFLAGS, - ), ], + rust_extensions=get_rust_extensions(), include_package_data=True, test_suite='tests.suite', setup_requires=setup_requires,