diff --git a/util/src/main/java/google/registry/util/DomainNameUtils.java b/util/src/main/java/google/registry/util/DomainNameUtils.java
index aa7beb8dd..16311facc 100644
--- a/util/src/main/java/google/registry/util/DomainNameUtils.java
+++ b/util/src/main/java/google/registry/util/DomainNameUtils.java
@@ -19,9 +19,11 @@ import static google.registry.util.PreconditionsUtils.checkArgumentNotNull;
import com.google.common.base.Ascii;
import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.net.InternetDomainName;
+import java.util.List;
/** Utility methods related to domain names. */
public final class DomainNameUtils {
@@ -39,14 +41,34 @@ public final class DomainNameUtils {
.equals(potentialParent.parts());
}
- /** Canonicalizes a hostname/domain name by lowercasing and converting unicode to punycode. */
+ /**
+ * Canonicalizes a hostname/domain name by lowercasing and converting Unicode to punycode.
+ *
+ *
This applies slightly stricter rules to all labels other than the TLD part (all other labels
+ * are not allowed to have hyphens in the third and fourth position except when using
+ * ACE-formatted Punycode). This restriction is not enforced on the last label (so multi-part TLDs
+ * still cannot have said characters except on the last part).
+ */
public static String canonicalizeHostname(String label) {
String labelLowercased = Ascii.toLowerCase(label);
- try {
- return Idn.toASCII(labelLowercased);
- } catch (IllegalArgumentException e) {
- throw new IllegalArgumentException(String.format("Error ASCIIfying label '%s'", label), e);
+ String finalChar = "";
+ if (labelLowercased.endsWith(".")) {
+ labelLowercased = labelLowercased.substring(0, labelLowercased.length() - 1);
+ finalChar = ".";
}
+ List parts = Splitter.on('.').splitToList(labelLowercased);
+ // If the hostname only has one part, just canonicalize that.
+ if (parts.size() == 1) {
+ return Idn.toASCII(parts.getFirst()) + finalChar;
+ }
+ // If the hostname has multiple parts, apply stricter validation to all labels but the last
+ // one (which relaxes the hyphens in third and fourth positions rule).
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < parts.size() - 1; i++) {
+ sb.append(Idn.toASCII(parts.get(i))).append('.');
+ }
+ sb.append(Idn.tldToASCII(parts.getLast())).append(finalChar);
+ return sb.toString();
}
/**
diff --git a/util/src/main/java/google/registry/util/Idn.java b/util/src/main/java/google/registry/util/Idn.java
index 3b417453c..eeb97f898 100644
--- a/util/src/main/java/google/registry/util/Idn.java
+++ b/util/src/main/java/google/registry/util/Idn.java
@@ -14,9 +14,15 @@
package google.registry.util;
+import static com.google.common.base.Preconditions.checkArgument;
+
import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
import com.ibm.icu.text.IDNA;
+import com.ibm.icu.text.IDNA.Error;
import com.ibm.icu.text.IDNA.Info;
+import java.util.Set;
/**
* A partial API-compatible replacement for {@link java.net.IDN} that replaces Unlike {@link #toASCII}, this method does NOT enforce the restriction that hyphens may only
+ * be present on the third and fourth characters for "xn--" ACE-formatted domains.
+ */
+ public static String tldToASCII(String name) {
+ Info info = new Info();
+ StringBuilder result = new StringBuilder();
+ UTS46_INSTANCE.nameToASCII(name, result, info);
+ Set errors = Sets.difference(info.getErrors(), ImmutableSet.of(Error.HYPHEN_3_4));
+ checkArgument(
+ errors.isEmpty(), "Errors ASCIIfying label %s: %s", name, Joiner.on(',').join(errors));
+ return result.toString();
+ }
+
/**
* Translates a string from ASCII Compatible Encoding (ACE) to Unicode, as defined by the
* ToUnicode operation of RFC 3490.
diff --git a/util/src/test/java/google/registry/util/DomainNameUtilsTest.java b/util/src/test/java/google/registry/util/DomainNameUtilsTest.java
index 36e4bc252..75b73e1f6 100644
--- a/util/src/test/java/google/registry/util/DomainNameUtilsTest.java
+++ b/util/src/test/java/google/registry/util/DomainNameUtilsTest.java
@@ -45,13 +45,37 @@ class DomainNameUtilsTest {
.isEqualTo("119.63.227.45-ns1.jhz-tt.uk");
}
+ @Test
+ void testCanonicalizeHostname_retainsTrailingDot() {
+ assertThat(canonicalizeHostname("みんな.みんな.")).isEqualTo("xn--q9jyb4c.xn--q9jyb4c.");
+ assertThat(canonicalizeHostname("BAR.foo.みんな.")).isEqualTo("bar.foo.xn--q9jyb4c.");
+ assertThat(canonicalizeHostname("cat.lol.")).isEqualTo("cat.lol.");
+ }
+
@Test
void testCanonicalizeHostname_throwsOn34HyphenRule() {
IllegalArgumentException thrown =
assertThrows(
IllegalArgumentException.class,
() -> canonicalizeHostname("119.63.227.45--ns1.jhz-tt.uk"));
- assertThat(thrown).hasCauseThat().hasMessageThat().contains("HYPHEN_3_4");
+ assertThat(thrown).hasMessageThat().contains("HYPHEN_3_4");
+ }
+
+ @Test
+ void testCanonicalizeHostname_throwsOn34HyphenRule_withTrailingDot() {
+ IllegalArgumentException thrown =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> canonicalizeHostname("119.63.227.45--ns1.jhz-tt.uk."));
+ assertThat(thrown).hasMessageThat().contains("HYPHEN_3_4");
+ }
+
+ @Test
+ void testCanonicalizeHostname_allows34HyphenOnTld() {
+ assertThat(canonicalizeHostname("foobar.zz--main-2262")).isEqualTo("foobar.zz--main-2262");
+ assertThat(canonicalizeHostname("foobar.zz--main-2262.")).isEqualTo("foobar.zz--main-2262.");
+ assertThat(canonicalizeHostname("みんな.45--foo")).isEqualTo("xn--q9jyb4c.45--foo");
+ assertThat(canonicalizeHostname("みんな.45--foo.")).isEqualTo("xn--q9jyb4c.45--foo.");
}
@Test