Skip to content

Refactor punycode #134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ target_sources(skyr-url
v1/domain/idna.cpp
v1/domain/idna_code_point_map_iterator.hpp
v1/domain/punycode.hpp
v1/domain/punycode.cpp
v1/url/url.cpp
v1/url/url_search_parameters.cpp

Expand Down
39 changes: 24 additions & 15 deletions src/v1/domain/domain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <skyr/v1/containers/static_vector.hpp>
#include <skyr/v1/domain/domain.hpp>
#include <skyr/v1/domain/errors.hpp>
#include <skyr/v1/unicode/ranges/transforms/u8_transform.hpp>
#include <skyr/v1/unicode/ranges/transforms/u32_transform.hpp>
#include <skyr/v1/unicode/ranges/views/u8_view.hpp>
#include "idna.hpp"
Expand All @@ -29,14 +30,14 @@
namespace skyr {
inline namespace v1 {
namespace {
template <class U32Range>
template <class DomainName>
auto map_code_points(
U32Range &&domain_name,
DomainName &&domain_name,
bool use_std3_ascii_rules,
bool transitional_processing,
std::u32string *result)
-> tl::expected<void, domain_errc> {
auto range = views::map_code_points(domain_name, use_std3_ascii_rules, transitional_processing);
auto range = idna::views::map_code_points(domain_name, use_std3_ascii_rules, transitional_processing);
auto first = std::cbegin(range);
auto last = std::cend(range);
for (auto it = first; it != last; ++it) {
Expand Down Expand Up @@ -123,13 +124,14 @@ auto domain_to_ascii(
}

if ((label.size() >= 4) && (label.substr(0, 4) == U"xn--")) {
auto decoded = punycode_decode(label.substr(4));
if (!decoded) {
return tl::make_unexpected(decoded.error());
auto decoded = std::u32string{};
auto result = punycode_decode(label.substr(4), &decoded);
if (!result) {
return tl::make_unexpected(result.error());
}

auto validated =
validate_label(decoded.value(), use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, false);
validate_label(decoded, use_std3_ascii_rules, check_hyphens, check_bidi, check_joiners, false);
if (!validated) {
return tl::make_unexpected(validated.error());
}
Expand All @@ -150,12 +152,13 @@ auto domain_to_ascii(

labels.emplace_back();
if (!is_ascii(label)) {
auto encoded = punycode_encode(label);
if (!encoded) {
return tl::make_unexpected(encoded.error());
auto encoded = std::string{};
auto result = punycode_encode(label, &encoded);
if (!result) {
return tl::make_unexpected(result.error());
}
ranges::copy(U"xn--"sv, ranges::back_inserter(labels.back()));
ranges::copy(encoded.value(), ranges::back_inserter(labels.back()));
ranges::copy(encoded, ranges::back_inserter(labels.back()));
}
else {
ranges::copy(label, ranges::back_inserter(labels.back()));
Expand Down Expand Up @@ -217,11 +220,17 @@ auto domain_to_u8(std::string_view domain_name, [[maybe_unused]] bool *validatio
labels.emplace_back();
if (label.substr(0, 4) == "xn--") {
label.remove_prefix(4);
auto decoded = punycode_decode(label);
if (!decoded) {
return tl::make_unexpected(decoded.error());
auto decoded = std::u32string{};
auto result = punycode_decode(label, &decoded);
if (!result) {
return tl::make_unexpected(result.error());
}
auto u8 = decoded | unicode::transforms::to_u8;
auto first = std::cbegin(u8);
auto last = std::cend(u8);
for (auto it = first; it != last; ++it) {
labels.back().push_back((*it).value());
}
ranges::copy(decoded.value(), ranges::back_inserter(labels.back()));
}
else {
ranges::copy(label, ranges::back_inserter(labels.back()));
Expand Down
74 changes: 58 additions & 16 deletions src/v1/domain/idna_code_point_map_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@

namespace skyr {
inline namespace v1 {
namespace idna {
/// An iterator adapter that converts a domain name.
/// \tparam Iterator
/// \tparam Sentinel
template <
class Iterator,
class Sentinel=unicode::sentinel
Expand All @@ -30,6 +34,11 @@ class idna_code_point_map_iterator {
using difference_type = typename Iterator::difference_type;
using size_type = typename Iterator::size_type;

/// Constructor
/// \param first
/// \param last
/// \param use_std3_ascii_rules
/// \param transitional_processing
explicit constexpr idna_code_point_map_iterator(
Iterator first,
Sentinel last,
Expand All @@ -40,17 +49,23 @@ class idna_code_point_map_iterator {
, use_std3_ascii_rules_(use_std3_ascii_rules)
, transitional_processing_(transitional_processing) {}

///
/// \return
constexpr auto operator ++ () noexcept -> idna_code_point_map_iterator & {
increment();
return *this;
}

///
/// \return
constexpr auto operator ++ (int) noexcept -> idna_code_point_map_iterator {
auto result = *this;
increment();
return result;
}

///
/// \return
constexpr auto operator * () const noexcept -> const_reference {
constexpr auto map_code_point = [] (char32_t code_point, bool use_std3_ascii_rules, bool transitional_processing)
-> tl::expected<char32_t, domain_errc> {
Expand Down Expand Up @@ -95,11 +110,17 @@ class idna_code_point_map_iterator {
}
}

[[nodiscard]] constexpr bool operator == ([[maybe_unused]] unicode::sentinel sentinel) const noexcept {
///
/// \param sentinel
/// \return
[[nodiscard]] constexpr auto operator == ([[maybe_unused]] unicode::sentinel sentinel) const noexcept {
return it_ == last_;
}

[[nodiscard]] constexpr bool operator != (unicode::sentinel sentinel) const noexcept {
///
/// \param sentinel
/// \return
[[nodiscard]] constexpr auto operator != (unicode::sentinel sentinel) const noexcept {
return !(*this == sentinel);
}

Expand All @@ -124,50 +145,71 @@ class idna_code_point_map_iterator {

};

template <class Range>
/// A range adapter that maps code points in a domain name using IDNA
/// \tparam DomainName
template <class DomainName>
class idna_code_point_map_range {
public:

///
/// \param domain_name
/// \param use_std3_ascii_rules
/// \param transitional_processing
constexpr idna_code_point_map_range(
Range &&range,
bool use_std3_ascii_rules,
bool transitional_processing)
: range_(range)
DomainName &&domain_name,
bool use_std3_ascii_rules,
bool transitional_processing)
: domain_name_(domain_name)
, use_std3_ascii_rules_(use_std3_ascii_rules)
, transitional_processing_(transitional_processing) {}

///
/// \return
[[nodiscard]] constexpr auto cbegin() const noexcept {
return idna_code_point_map_iterator<unicode::traits::range_iterator_t<Range>>(
std::cbegin(range_), std::cend(range_), use_std3_ascii_rules_, transitional_processing_);
return idna_code_point_map_iterator<unicode::traits::range_iterator_t<DomainName>>(
std::cbegin(domain_name_), std::cend(domain_name_), use_std3_ascii_rules_, transitional_processing_);
}

///
/// \return
[[nodiscard]] constexpr auto cend() const noexcept {
return unicode::sentinel{};
}

///
/// \return
[[nodiscard]] constexpr auto begin() const noexcept {
return cbegin();
}

///
/// \return
[[nodiscard]] constexpr auto end() const noexcept {
return cend();
}

private:

Range range_;
DomainName domain_name_;
bool use_std3_ascii_rules_;
bool transitional_processing_;

};

namespace views {
template <class Range>
constexpr inline auto map_code_points(Range &&range, bool use_std3_ascii_rules, bool transitional_processing)
-> idna_code_point_map_range<Range> {
return {range, use_std3_ascii_rules, transitional_processing};
///
/// \tparam DomainName
/// \param domain_name
/// \param use_std3_ascii_rules
/// \param transitional_processing
/// \return A range adapter
template <class DomainName>
constexpr inline auto map_code_points(
DomainName &&domain_name, bool use_std3_ascii_rules, bool transitional_processing)
-> idna_code_point_map_range<DomainName> {
return {domain_name, use_std3_ascii_rules, transitional_processing};
}
} //
} // namespaec views
} // namespace idna
} // namespace v1
} // namespace skyr

Expand Down
Loading