diff --git a/util/src/main/java/google/registry/util/DomainNameUtils.java b/util/src/main/java/google/registry/util/DomainNameUtils.java index aa7beb8dd..16311facc 100644 --- a/util/src/main/java/google/registry/util/DomainNameUtils.java +++ b/util/src/main/java/google/registry/util/DomainNameUtils.java @@ -19,9 +19,11 @@ import static google.registry.util.PreconditionsUtils.checkArgumentNotNull; import com.google.common.base.Ascii; import com.google.common.base.Joiner; +import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.net.InternetDomainName; +import java.util.List; /** Utility methods related to domain names. */ public final class DomainNameUtils { @@ -39,14 +41,34 @@ public final class DomainNameUtils { .equals(potentialParent.parts()); } - /** Canonicalizes a hostname/domain name by lowercasing and converting unicode to punycode. */ + /** + * Canonicalizes a hostname/domain name by lowercasing and converting Unicode to punycode. + * + *

This applies slightly stricter rules to all labels other than the TLD part (all other labels + * are not allowed to have hyphens in the third and fourth position except when using + * ACE-formatted Punycode). This restriction is not enforced on the last label (so multi-part TLDs + * still cannot have said characters except on the last part). + */ public static String canonicalizeHostname(String label) { String labelLowercased = Ascii.toLowerCase(label); - try { - return Idn.toASCII(labelLowercased); - } catch (IllegalArgumentException e) { - throw new IllegalArgumentException(String.format("Error ASCIIfying label '%s'", label), e); + String finalChar = ""; + if (labelLowercased.endsWith(".")) { + labelLowercased = labelLowercased.substring(0, labelLowercased.length() - 1); + finalChar = "."; } + List parts = Splitter.on('.').splitToList(labelLowercased); + // If the hostname only has one part, just canonicalize that. + if (parts.size() == 1) { + return Idn.toASCII(parts.getFirst()) + finalChar; + } + // If the hostname has multiple parts, apply stricter validation to all labels but the last + // one (which relaxes the hyphens in third and fourth positions rule). + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < parts.size() - 1; i++) { + sb.append(Idn.toASCII(parts.get(i))).append('.'); + } + sb.append(Idn.tldToASCII(parts.getLast())).append(finalChar); + return sb.toString(); } /** diff --git a/util/src/main/java/google/registry/util/Idn.java b/util/src/main/java/google/registry/util/Idn.java index 3b417453c..eeb97f898 100644 --- a/util/src/main/java/google/registry/util/Idn.java +++ b/util/src/main/java/google/registry/util/Idn.java @@ -14,9 +14,15 @@ package google.registry.util; +import static com.google.common.base.Preconditions.checkArgument; + import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; import com.ibm.icu.text.IDNA; +import com.ibm.icu.text.IDNA.Error; import com.ibm.icu.text.IDNA.Info; +import java.util.Set; /** * A partial API-compatible replacement for {@link java.net.IDN} that replaces Unlike {@link #toASCII}, this method does NOT enforce the restriction that hyphens may only + * be present on the third and fourth characters for "xn--" ACE-formatted domains. + */ + public static String tldToASCII(String name) { + Info info = new Info(); + StringBuilder result = new StringBuilder(); + UTS46_INSTANCE.nameToASCII(name, result, info); + Set errors = Sets.difference(info.getErrors(), ImmutableSet.of(Error.HYPHEN_3_4)); + checkArgument( + errors.isEmpty(), "Errors ASCIIfying label %s: %s", name, Joiner.on(',').join(errors)); + return result.toString(); + } + /** * Translates a string from ASCII Compatible Encoding (ACE) to Unicode, as defined by the * ToUnicode operation of RFC 3490. diff --git a/util/src/test/java/google/registry/util/DomainNameUtilsTest.java b/util/src/test/java/google/registry/util/DomainNameUtilsTest.java index 36e4bc252..75b73e1f6 100644 --- a/util/src/test/java/google/registry/util/DomainNameUtilsTest.java +++ b/util/src/test/java/google/registry/util/DomainNameUtilsTest.java @@ -45,13 +45,37 @@ class DomainNameUtilsTest { .isEqualTo("119.63.227.45-ns1.jhz-tt.uk"); } + @Test + void testCanonicalizeHostname_retainsTrailingDot() { + assertThat(canonicalizeHostname("みんな.みんな.")).isEqualTo("xn--q9jyb4c.xn--q9jyb4c."); + assertThat(canonicalizeHostname("BAR.foo.みんな.")).isEqualTo("bar.foo.xn--q9jyb4c."); + assertThat(canonicalizeHostname("cat.lol.")).isEqualTo("cat.lol."); + } + @Test void testCanonicalizeHostname_throwsOn34HyphenRule() { IllegalArgumentException thrown = assertThrows( IllegalArgumentException.class, () -> canonicalizeHostname("119.63.227.45--ns1.jhz-tt.uk")); - assertThat(thrown).hasCauseThat().hasMessageThat().contains("HYPHEN_3_4"); + assertThat(thrown).hasMessageThat().contains("HYPHEN_3_4"); + } + + @Test + void testCanonicalizeHostname_throwsOn34HyphenRule_withTrailingDot() { + IllegalArgumentException thrown = + assertThrows( + IllegalArgumentException.class, + () -> canonicalizeHostname("119.63.227.45--ns1.jhz-tt.uk.")); + assertThat(thrown).hasMessageThat().contains("HYPHEN_3_4"); + } + + @Test + void testCanonicalizeHostname_allows34HyphenOnTld() { + assertThat(canonicalizeHostname("foobar.zz--main-2262")).isEqualTo("foobar.zz--main-2262"); + assertThat(canonicalizeHostname("foobar.zz--main-2262.")).isEqualTo("foobar.zz--main-2262."); + assertThat(canonicalizeHostname("みんな.45--foo")).isEqualTo("xn--q9jyb4c.45--foo"); + assertThat(canonicalizeHostname("みんな.45--foo.")).isEqualTo("xn--q9jyb4c.45--foo."); } @Test