From cbba91558a090490dcac137d44bf049ebe6b5c47 Mon Sep 17 00:00:00 2001 From: Ben McIlwain Date: Mon, 22 Dec 2025 19:57:57 -0500 Subject: [PATCH] Allow double hyphens in 3rd&4th position in all domain operations (#2909) This is a follow-up to PR #2908, which relaxed this restriction on bare TLDs only, but now we also allow it systemwide on domains and hostnames as well. The rules against hyphens in these positions are still enforced on all parts of the domain name except the last one. Correct handling of multi-part TLDs in this regard is out of scope in this PR; a multi-part TLD that looked something like ".zz--foobar.foobar" would still fail validation. (But of course you cannot a priori know just from looking at a 3-part string whether it might be a hostname on a normal TLD, or a domain name on a 2-part TLD.) This also has some annoying interactions with a trailing dot (indicating the root), which need to be preserved, but otherwise don't affect how TLD validation is handled. BUG= http://b/471013082 --- .../google/registry/util/DomainNameUtils.java | 32 ++++++++++++++++--- .../main/java/google/registry/util/Idn.java | 26 ++++++++++++++- .../registry/util/DomainNameUtilsTest.java | 26 ++++++++++++++- 3 files changed, 77 insertions(+), 7 deletions(-) diff --git a/util/src/main/java/google/registry/util/DomainNameUtils.java b/util/src/main/java/google/registry/util/DomainNameUtils.java index aa7beb8dd..16311facc 100644 --- a/util/src/main/java/google/registry/util/DomainNameUtils.java +++ b/util/src/main/java/google/registry/util/DomainNameUtils.java @@ -19,9 +19,11 @@ import static google.registry.util.PreconditionsUtils.checkArgumentNotNull; import com.google.common.base.Ascii; import com.google.common.base.Joiner; +import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.net.InternetDomainName; +import java.util.List; /** Utility methods related to domain names. */ public final class DomainNameUtils { @@ -39,14 +41,34 @@ public final class DomainNameUtils { .equals(potentialParent.parts()); } - /** Canonicalizes a hostname/domain name by lowercasing and converting unicode to punycode. */ + /** + * Canonicalizes a hostname/domain name by lowercasing and converting Unicode to punycode. + * + *

This applies slightly stricter rules to all labels other than the TLD part (all other labels + * are not allowed to have hyphens in the third and fourth position except when using + * ACE-formatted Punycode). This restriction is not enforced on the last label (so multi-part TLDs + * still cannot have said characters except on the last part). + */ public static String canonicalizeHostname(String label) { String labelLowercased = Ascii.toLowerCase(label); - try { - return Idn.toASCII(labelLowercased); - } catch (IllegalArgumentException e) { - throw new IllegalArgumentException(String.format("Error ASCIIfying label '%s'", label), e); + String finalChar = ""; + if (labelLowercased.endsWith(".")) { + labelLowercased = labelLowercased.substring(0, labelLowercased.length() - 1); + finalChar = "."; } + List parts = Splitter.on('.').splitToList(labelLowercased); + // If the hostname only has one part, just canonicalize that. + if (parts.size() == 1) { + return Idn.toASCII(parts.getFirst()) + finalChar; + } + // If the hostname has multiple parts, apply stricter validation to all labels but the last + // one (which relaxes the hyphens in third and fourth positions rule). + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < parts.size() - 1; i++) { + sb.append(Idn.toASCII(parts.get(i))).append('.'); + } + sb.append(Idn.tldToASCII(parts.getLast())).append(finalChar); + return sb.toString(); } /** diff --git a/util/src/main/java/google/registry/util/Idn.java b/util/src/main/java/google/registry/util/Idn.java index 3b417453c..eeb97f898 100644 --- a/util/src/main/java/google/registry/util/Idn.java +++ b/util/src/main/java/google/registry/util/Idn.java @@ -14,9 +14,15 @@ package google.registry.util; +import static com.google.common.base.Preconditions.checkArgument; + import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; import com.ibm.icu.text.IDNA; +import com.ibm.icu.text.IDNA.Error; import com.ibm.icu.text.IDNA.Info; +import java.util.Set; /** * A partial API-compatible replacement for {@link java.net.IDN} that replaces Unlike {@link #toASCII}, this method does NOT enforce the restriction that hyphens may only + * be present on the third and fourth characters for "xn--" ACE-formatted domains. + */ + public static String tldToASCII(String name) { + Info info = new Info(); + StringBuilder result = new StringBuilder(); + UTS46_INSTANCE.nameToASCII(name, result, info); + Set errors = Sets.difference(info.getErrors(), ImmutableSet.of(Error.HYPHEN_3_4)); + checkArgument( + errors.isEmpty(), "Errors ASCIIfying label %s: %s", name, Joiner.on(',').join(errors)); + return result.toString(); + } + /** * Translates a string from ASCII Compatible Encoding (ACE) to Unicode, as defined by the * ToUnicode operation of RFC 3490. diff --git a/util/src/test/java/google/registry/util/DomainNameUtilsTest.java b/util/src/test/java/google/registry/util/DomainNameUtilsTest.java index 36e4bc252..75b73e1f6 100644 --- a/util/src/test/java/google/registry/util/DomainNameUtilsTest.java +++ b/util/src/test/java/google/registry/util/DomainNameUtilsTest.java @@ -45,13 +45,37 @@ class DomainNameUtilsTest { .isEqualTo("119.63.227.45-ns1.jhz-tt.uk"); } + @Test + void testCanonicalizeHostname_retainsTrailingDot() { + assertThat(canonicalizeHostname("みんな.みんな.")).isEqualTo("xn--q9jyb4c.xn--q9jyb4c."); + assertThat(canonicalizeHostname("BAR.foo.みんな.")).isEqualTo("bar.foo.xn--q9jyb4c."); + assertThat(canonicalizeHostname("cat.lol.")).isEqualTo("cat.lol."); + } + @Test void testCanonicalizeHostname_throwsOn34HyphenRule() { IllegalArgumentException thrown = assertThrows( IllegalArgumentException.class, () -> canonicalizeHostname("119.63.227.45--ns1.jhz-tt.uk")); - assertThat(thrown).hasCauseThat().hasMessageThat().contains("HYPHEN_3_4"); + assertThat(thrown).hasMessageThat().contains("HYPHEN_3_4"); + } + + @Test + void testCanonicalizeHostname_throwsOn34HyphenRule_withTrailingDot() { + IllegalArgumentException thrown = + assertThrows( + IllegalArgumentException.class, + () -> canonicalizeHostname("119.63.227.45--ns1.jhz-tt.uk.")); + assertThat(thrown).hasMessageThat().contains("HYPHEN_3_4"); + } + + @Test + void testCanonicalizeHostname_allows34HyphenOnTld() { + assertThat(canonicalizeHostname("foobar.zz--main-2262")).isEqualTo("foobar.zz--main-2262"); + assertThat(canonicalizeHostname("foobar.zz--main-2262.")).isEqualTo("foobar.zz--main-2262."); + assertThat(canonicalizeHostname("みんな.45--foo")).isEqualTo("xn--q9jyb4c.45--foo"); + assertThat(canonicalizeHostname("みんな.45--foo.")).isEqualTo("xn--q9jyb4c.45--foo."); } @Test