certctl/internal/validation/unicode_test.go

package validation

import (
	"strings"
	"testing"
)

// Bundle-9 / Audit L-012 / CWE-1007 + CWE-176 regression suite.
//
// Note: invisible / formatting characters in test inputs are written as
// \uXXXX escape sequences (NOT literal codepoints) so the source file
// stays parseable + readable. Literal BOM / RTL-override bytes inside
// a Go string literal trip the parser ("illegal byte order mark").

func TestValidateUnicodeSafe_AcceptsCleanASCII(t *testing.T) {
	cases := []string{
		"example.com",
		"api.example.com",
		"sub-domain.example.co.uk",
		"a.b.c.d.example.org",
		"localhost",
		"192.168.1.1",
		"",
	}
	for _, c := range cases {
		t.Run(c, func(t *testing.T) {
			if err := ValidateUnicodeSafe(c); err != nil {
				t.Errorf("clean ASCII %q rejected: %v", c, err)
			}
		})
	}
}

func TestValidateUnicodeSafe_RejectsRTLOverride(t *testing.T) {
	cases := []struct {
		name string
		in   string
	}{
		{"LRE", "good\u202Acom"},
		{"RLE", "good\u202Bcom"},
		{"PDF", "good\u202Ccom"},
		{"LRO", "good\u202Dcom"},
		{"RLO", "good\u202Ecom"},
		{"LRI", "good\u2066com"},
		{"RLI", "good\u2067com"},
		{"FSI", "good\u2068com"},
		{"PDI", "good\u2069com"},
	}
	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			err := ValidateUnicodeSafe(c.in)
			if err == nil {
				t.Fatal("expected rejection")
			}
			if !strings.Contains(err.Error(), "bidirectional override") {
				t.Errorf("error should cite bidirectional override; got: %v", err)
			}
		})
	}
}

func TestValidateUnicodeSafe_RejectsZeroWidth(t *testing.T) {
	cases := []struct {
		name string
		in   string
	}{
		{"ZWSP", "good\u200Bcom"},
		{"ZWNJ", "good\u200Ccom"},
		{"ZWJ", "good\u200Dcom"},
		{"WJ", "good\u2060com"},
		{"BOM", "good\uFEFFcom"},
	}
	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			err := ValidateUnicodeSafe(c.in)
			if err == nil {
				t.Fatal("expected rejection")
			}
			if !strings.Contains(err.Error(), "zero-width") {
				t.Errorf("error should cite zero-width; got: %v", err)
			}
		})
	}
}

func TestValidateUnicodeSafe_RejectsControlChars(t *testing.T) {
	cases := []struct {
		name string
		in   string
	}{
		{"NUL", "good\x00com"},
		{"TAB", "good\tcom"},
		{"LF", "good\ncom"},
		{"CR", "good\rcom"},
		{"DEL", "good\x7Fcom"},
	}
	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			err := ValidateUnicodeSafe(c.in)
			if err == nil {
				t.Fatal("expected rejection")
			}
			if !strings.Contains(err.Error(), "control character") {
				t.Errorf("error should cite control character; got: %v", err)
			}
		})
	}
}

func TestValidateUnicodeSafe_RejectsIDNHomograph(t *testing.T) {
	// Cyrillic 'а' (U+0430) inside an otherwise-Latin label — visually
	// identical to Latin 'a' but a different codepoint. Classic homograph.
	cases := []struct {
		name string
		in   string
	}{
		{"cyrillic_a_in_apple", "аpple.com"},
		{"greek_omicron_in_google", "gοogle.com"},
		{"cherokee_letter", "gᏇogle.com"},
	}
	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			err := ValidateUnicodeSafe(c.in)
			if err == nil {
				t.Fatal("expected rejection")
			}
			if !strings.Contains(err.Error(), "IDN homograph") {
				t.Errorf("error should cite IDN homograph; got: %v", err)
			}
		})
	}
}

func TestValidateUnicodeSafe_AcceptsPureNonASCII(t *testing.T) {
	// A fully-Cyrillic label is a legitimate IDN — don't reject. The
	// homograph attack we're defending against is the MIX with ASCII.
	in := "пример.рф"
	if err := ValidateUnicodeSafe(in); err != nil {
		t.Errorf("pure-Cyrillic label rejected: %v", err)
	}
}

func TestValidateUnicodeSafe_ErrorMentionsByteOffset(t *testing.T) {
	in := "good\u202Eevil.com"
	err := ValidateUnicodeSafe(in)
	if err == nil {
		t.Fatal("expected rejection")
	}
	if !strings.Contains(err.Error(), "byte offset") {
		t.Errorf("error should cite byte offset; got: %v", err)
	}
}

func TestValidateUnicodeSafe_EmptyStringPasses(t *testing.T) {
	if err := ValidateUnicodeSafe(""); err != nil {
		t.Errorf("empty string should pass through (different validator handles required); got: %v", err)
	}
}