1
0
mirror of https://github.com/google/nomulus synced 2025-12-23 06:15:42 +00:00

Allow double hyphens in 3rd&4th position in all domain operations (#2909)

This is a follow-up to PR #2908, which relaxed this restriction on bare TLDs
only, but now we also allow it systemwide on domains and hostnames as well.  The
rules against hyphens in these positions are still enforced on all parts of the
domain name except the last one. Correct handling of multi-part TLDs in this
regard is out of scope in this PR; a multi-part TLD that looked something like
".zz--foobar.foobar" would still fail validation. (But of course you cannot a
priori know just from looking at a 3-part string whether it might be a hostname
on a normal TLD, or a domain name on a 2-part TLD.)

This also has some annoying interactions with a trailing dot (indicating the
root), which need to be preserved, but otherwise don't affect how TLD validation
is handled.

BUG= http://b/471013082
This commit is contained in:
Ben McIlwain
2025-12-22 19:57:57 -05:00
committed by GitHub
parent c24f09febc
commit cbba91558a
3 changed files with 77 additions and 7 deletions

View File

@@ -19,9 +19,11 @@ import static google.registry.util.PreconditionsUtils.checkArgumentNotNull;
import com.google.common.base.Ascii;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.net.InternetDomainName;
import java.util.List;
/** Utility methods related to domain names. */
public final class DomainNameUtils {
@@ -39,14 +41,34 @@ public final class DomainNameUtils {
.equals(potentialParent.parts());
}
/** Canonicalizes a hostname/domain name by lowercasing and converting unicode to punycode. */
/**
* Canonicalizes a hostname/domain name by lowercasing and converting Unicode to punycode.
*
* <p>This applies slightly stricter rules to all labels other than the TLD part (all other labels
* are not allowed to have hyphens in the third and fourth position except when using
* ACE-formatted Punycode). This restriction is not enforced on the last label (so multi-part TLDs
* still cannot have said characters except on the last part).
*/
public static String canonicalizeHostname(String label) {
String labelLowercased = Ascii.toLowerCase(label);
try {
return Idn.toASCII(labelLowercased);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(String.format("Error ASCIIfying label '%s'", label), e);
String finalChar = "";
if (labelLowercased.endsWith(".")) {
labelLowercased = labelLowercased.substring(0, labelLowercased.length() - 1);
finalChar = ".";
}
List<String> parts = Splitter.on('.').splitToList(labelLowercased);
// If the hostname only has one part, just canonicalize that.
if (parts.size() == 1) {
return Idn.toASCII(parts.getFirst()) + finalChar;
}
// If the hostname has multiple parts, apply stricter validation to all labels but the last
// one (which relaxes the hyphens in third and fourth positions rule).
StringBuilder sb = new StringBuilder();
for (int i = 0; i < parts.size() - 1; i++) {
sb.append(Idn.toASCII(parts.get(i))).append('.');
}
sb.append(Idn.tldToASCII(parts.getLast())).append(finalChar);
return sb.toString();
}
/**

View File

@@ -14,9 +14,15 @@
package google.registry.util;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.ibm.icu.text.IDNA;
import com.ibm.icu.text.IDNA.Error;
import com.ibm.icu.text.IDNA.Info;
import java.util.Set;
/**
* A partial API-compatible replacement for {@link java.net.IDN} that replaces <a
@@ -51,11 +57,29 @@ public final class Idn {
StringBuilder result = new StringBuilder();
UTS46_INSTANCE.nameToASCII(name, result, info);
if (info.hasErrors()) {
throw new IllegalArgumentException("Errors: " + Joiner.on(',').join(info.getErrors()));
throw new IllegalArgumentException(
String.format(
"Errors ASCIIfying label %s: %s", name, Joiner.on(',').join(info.getErrors())));
}
return result.toString();
}
/**
* Translates a TLD string from Unicode to Punycoded ASCII.
*
* <p>Unlike {@link #toASCII}, this method does NOT enforce the restriction that hyphens may only
* be present on the third and fourth characters for "xn--" ACE-formatted domains.
*/
public static String tldToASCII(String name) {
Info info = new Info();
StringBuilder result = new StringBuilder();
UTS46_INSTANCE.nameToASCII(name, result, info);
Set<Error> errors = Sets.difference(info.getErrors(), ImmutableSet.of(Error.HYPHEN_3_4));
checkArgument(
errors.isEmpty(), "Errors ASCIIfying label %s: %s", name, Joiner.on(',').join(errors));
return result.toString();
}
/**
* Translates a string from ASCII Compatible Encoding (ACE) to Unicode, as defined by the
* ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.

View File

@@ -45,13 +45,37 @@ class DomainNameUtilsTest {
.isEqualTo("119.63.227.45-ns1.jhz-tt.uk");
}
@Test
void testCanonicalizeHostname_retainsTrailingDot() {
assertThat(canonicalizeHostname("みんな.みんな.")).isEqualTo("xn--q9jyb4c.xn--q9jyb4c.");
assertThat(canonicalizeHostname("BAR.foo.みんな.")).isEqualTo("bar.foo.xn--q9jyb4c.");
assertThat(canonicalizeHostname("cat.lol.")).isEqualTo("cat.lol.");
}
@Test
void testCanonicalizeHostname_throwsOn34HyphenRule() {
IllegalArgumentException thrown =
assertThrows(
IllegalArgumentException.class,
() -> canonicalizeHostname("119.63.227.45--ns1.jhz-tt.uk"));
assertThat(thrown).hasCauseThat().hasMessageThat().contains("HYPHEN_3_4");
assertThat(thrown).hasMessageThat().contains("HYPHEN_3_4");
}
@Test
void testCanonicalizeHostname_throwsOn34HyphenRule_withTrailingDot() {
IllegalArgumentException thrown =
assertThrows(
IllegalArgumentException.class,
() -> canonicalizeHostname("119.63.227.45--ns1.jhz-tt.uk."));
assertThat(thrown).hasMessageThat().contains("HYPHEN_3_4");
}
@Test
void testCanonicalizeHostname_allows34HyphenOnTld() {
assertThat(canonicalizeHostname("foobar.zz--main-2262")).isEqualTo("foobar.zz--main-2262");
assertThat(canonicalizeHostname("foobar.zz--main-2262.")).isEqualTo("foobar.zz--main-2262.");
assertThat(canonicalizeHostname("みんな.45--foo")).isEqualTo("xn--q9jyb4c.45--foo");
assertThat(canonicalizeHostname("みんな.45--foo.")).isEqualTo("xn--q9jyb4c.45--foo.");
}
@Test