--- /dev/null
+commit 9d26727a4e85
+Author: Zibi Braniecki <zbraniecki@mozilla.com>
+Date: Fri Feb 21 00:39:18 2020 +0000
+
+ Bug 1607052 - Unify language identifier canonicalization in LocaleService and OSPreferences to use unic_langid, and extend GTK's RegionalPrefs reading to fallback on SystemLocales. r=jfkthame
+
+ Differential Revision: https://phabricator.services.mozilla.com/D63393
+
+ --HG--
+ extra : moz-landing-system : lando
+---
+ intl/locale/LocaleService.cpp | 53 ++-------------------------
+ intl/locale/LocaleService.h | 18 +++++++++
+ intl/locale/OSPreferences.cpp | 18 ++-------
+ intl/locale/rust/unic-langid-ffi/src/lib.rs | 33 ++++++++++++++---
+ intl/locale/tests/gtest/TestLocaleService.cpp | 23 ++++++++++++
+ intl/locale/tests/unit/test_localeService.js | 3 +-
+ 6 files changed, 77 insertions(+), 71 deletions(-)
+
+diff --git intl/locale/LocaleService.cpp intl/locale/LocaleService.cpp
+index f7e13e36ccbc7..6708c4af4b5c3 100644
+--- old/intl/locale/LocaleService.cpp
++++ new/intl/locale/LocaleService.cpp
+@@ -39,51 +39,6 @@ NS_IMPL_ISUPPORTS(LocaleService, mozILocaleService, nsIObserver,
+
+ mozilla::StaticRefPtr<LocaleService> LocaleService::sInstance;
+
+-/**
+- * This function transforms a canonical Mozilla Language Tag, into it's
+- * BCP47 compilant form.
+- *
+- * Example: "ja-JP-mac" -> "ja-JP-macos"
+- *
+- * The BCP47 form should be used for all calls to ICU/Intl APIs.
+- * The canonical form is used for all internal operations.
+- */
+-static bool SanitizeForBCP47(nsACString& aLocale, bool strict) {
+- // Currently, the only locale code we use that's not BCP47-conformant is
+- // "ja-JP-mac" on OS X, and ICU canonicalizes it into a mouthfull
+- // "ja-JP-x-lvariant-mac", so instead we're hardcoding a conversion
+- // of it to "ja-JP-macos".
+- if (aLocale.LowerCaseEqualsASCII("ja-jp-mac")) {
+- aLocale.AssignLiteral("ja-JP-macos");
+- return true;
+- }
+-
+- nsAutoCString locale(aLocale);
+- locale.Trim(" ");
+-
+- // POSIX may bring us locales such as "en-US.UTF8", which
+- // ICU converts to `en-US-u-va-posix`. Let's cut out
+- // the `.UTF8`, since it doesn't matter for us.
+- int32_t pos = locale.FindChar('.');
+- if (pos != -1) {
+- locale.Cut(pos, locale.Length() - pos);
+- }
+-
+- // The rest of this function will use ICU canonicalization for any other
+- // tag that may come this way.
+- const int32_t LANG_TAG_CAPACITY = 128;
+- char langTag[LANG_TAG_CAPACITY];
+- UErrorCode err = U_ZERO_ERROR;
+- // This is a fail-safe method that will set langTag to "und" if it cannot
+- // match any part of the input locale code.
+- int32_t len = uloc_toLanguageTag(locale.get(), langTag, LANG_TAG_CAPACITY,
+- strict, &err);
+- if (U_SUCCESS(err) && len > 0) {
+- aLocale.Assign(langTag, len);
+- }
+- return U_SUCCESS(err);
+-}
+-
+ /**
+ * This function splits an input string by `,` delimiter, sanitizes the result
+ * language tags and returns them to the caller.
+@@ -93,7 +48,7 @@ static void SplitLocaleListStringIntoArray(nsACString& str,
+ if (str.Length() > 0) {
+ for (const nsACString& part : str.Split(',')) {
+ nsAutoCString locale(part);
+- if (SanitizeForBCP47(locale, true)) {
++ if (LocaleService::CanonicalizeLanguageId(locale)) {
+ if (!aRetVal.Contains(locale)) {
+ aRetVal.AppendElement(locale);
+ }
+@@ -421,7 +376,7 @@ LocaleService::GetDefaultLocale(nsACString& aRetVal) {
+ locale.Trim(" \t\n\r");
+ // This should never be empty.
+ MOZ_ASSERT(!locale.IsEmpty());
+- if (SanitizeForBCP47(locale, true)) {
++ if (CanonicalizeLanguageId(locale)) {
+ mDefaultLocale.Assign(locale);
+ }
+
+@@ -617,7 +572,7 @@ LocaleService::SetRequestedLocales(const nsTArray<nsCString>& aRequested) {
+
+ for (auto& req : aRequested) {
+ nsAutoCString locale(req);
+- if (!SanitizeForBCP47(locale, true)) {
++ if (!CanonicalizeLanguageId(locale)) {
+ NS_ERROR("Invalid language tag provided to SetRequestedLocales!");
+ return NS_ERROR_INVALID_ARG;
+ }
+@@ -667,7 +622,7 @@ LocaleService::SetAvailableLocales(const nsTArray<nsCString>& aAvailable) {
+
+ for (auto& avail : aAvailable) {
+ nsAutoCString locale(avail);
+- if (!SanitizeForBCP47(locale, true)) {
++ if (!CanonicalizeLanguageId(locale)) {
+ NS_ERROR("Invalid language tag provided to SetAvailableLocales!");
+ return NS_ERROR_INVALID_ARG;
+ }
+diff --git intl/locale/LocaleService.h intl/locale/LocaleService.h
+index 2ff428da3e3fd..5f5152c1fbb7a 100644
+--- old/intl/locale/LocaleService.h
++++ new/intl/locale/LocaleService.h
+@@ -10,6 +10,7 @@
+ #include "nsString.h"
+ #include "nsTArray.h"
+ #include "nsWeakReference.h"
++#include "MozLocaleBindings.h"
+
+ #include "mozILocaleService.h"
+
+@@ -104,6 +105,23 @@ class LocaleService final : public mozILocaleService,
+ return RefPtr<LocaleService>(GetInstance()).forget();
+ }
+
++ /**
++ * Canonicalize a Unicode Language Identifier string.
++ *
++ * The operation is:
++ * * Normalizing casing (`eN-Us-Windows` -> `en-US-windows`)
++ * * Switching `_` to `-` (`en_US` -> `en-US`)
++ * * Rejecting invalid identifiers (`e21-X` sets aLocale to `und` and
++ * returns false)
++ * * Normalizing Mozilla's `ja-JP-mac` to `ja-JP-macos`
++ * * Cutting off POSIX dot postfix (`en-US.utf8` -> `en-US`)
++ *
++ * This operation should be used on any external input before
++ * it gets used in internal operations.
++ */
++ static bool CanonicalizeLanguageId(nsACString& aLocale) {
++ return ffi::unic_langid_canonicalize(&aLocale);
++ }
+ /**
+ * This method should only be called in the client mode.
+ *
+diff --git intl/locale/OSPreferences.cpp intl/locale/OSPreferences.cpp
+index 283db96e0d718..f816ca57abfea 100644
+--- old/intl/locale/OSPreferences.cpp
++++ new/intl/locale/OSPreferences.cpp
+@@ -54,19 +54,7 @@ void OSPreferences::Refresh() {
+ * It returns true if the canonicalization was successful.
+ */
+ bool OSPreferences::CanonicalizeLanguageTag(nsCString& aLoc) {
+- char langTag[512];
+-
+- UErrorCode status = U_ZERO_ERROR;
+-
+- int32_t langTagLen = uloc_toLanguageTag(aLoc.get(), langTag,
+- sizeof(langTag) - 1, false, &status);
+-
+- if (U_FAILURE(status)) {
+- return false;
+- }
+-
+- aLoc.Assign(langTag, langTagLen);
+- return true;
++ return LocaleService::CanonicalizeLanguageId(aLoc);
+ }
+
+ /**
+@@ -291,7 +279,9 @@ OSPreferences::GetRegionalPrefsLocales(nsTArray<nsCString>& aRetVal) {
+ return NS_OK;
+ }
+
+- return NS_ERROR_FAILURE;
++ // If we failed to read regional prefs locales,
++ // use system locales as last fallback.
++ return GetSystemLocales(aRetVal);
+ }
+
+ static OSPreferences::DateTimeFormatStyle ToDateTimeFormatStyle(
+diff --git intl/locale/rust/unic-langid-ffi/src/lib.rs intl/locale/rust/unic-langid-ffi/src/lib.rs
+index 701d3679c4cec..517e08c1a64c1 100644
+--- old/intl/locale/rust/unic-langid-ffi/src/lib.rs
++++ new/intl/locale/rust/unic-langid-ffi/src/lib.rs
+@@ -5,18 +5,39 @@
+ use nsstring::nsACString;
+ use nsstring::nsCString;
+ use thin_vec::ThinVec;
+-pub use unic_langid::LanguageIdentifier;
++pub use unic_langid::{LanguageIdentifier, LanguageIdentifierError};
++
++fn new_langid_for_mozilla(name: &nsACString) -> Result<LanguageIdentifier, LanguageIdentifierError> {
++ if name.eq_ignore_ascii_case(b"ja-jp-mac") {
++ "ja-JP-macos".parse()
++ } else {
++ // Cut out any `.FOO` like `en-US.POSIX`.
++ let mut name: &[u8] = name.as_ref();
++ if let Some(ptr) = name.iter().position(|b| b == &b'.') {
++ name = &name[..ptr];
++ }
++ LanguageIdentifier::from_bytes(name)
++ }
++}
++
++#[no_mangle]
++pub unsafe extern "C" fn unic_langid_canonicalize(name: &mut nsACString) -> bool {
++ let langid = new_langid_for_mozilla(name);
++
++ let result = langid.is_ok();
++
++ name.assign(&langid.unwrap_or_default().to_string());
++
++ result
++}
++
+
+ #[no_mangle]
+ pub unsafe extern "C" fn unic_langid_new(
+ name: &nsACString,
+ ret_val: &mut bool,
+ ) -> *mut LanguageIdentifier {
+- let langid = if name.eq_ignore_ascii_case(b"ja-jp-mac") {
+- "ja-JP-macos".parse()
+- } else {
+- LanguageIdentifier::from_bytes(name)
+- };
++ let langid = new_langid_for_mozilla(name);
+
+ *ret_val = langid.is_ok();
+ Box::into_raw(Box::new(langid.unwrap_or_default()))
+diff --git intl/locale/tests/gtest/TestLocaleService.cpp intl/locale/tests/gtest/TestLocaleService.cpp
+index b13ecfd237dfa..aaac8615b09fd 100644
+--- old/intl/locale/tests/gtest/TestLocaleService.cpp
++++ new/intl/locale/tests/gtest/TestLocaleService.cpp
+@@ -10,6 +10,29 @@
+
+ using namespace mozilla::intl;
+
++TEST(Intl_Locale_LocaleService, CanonicalizeLanguageId)
++{
++ nsCString locale("en-US.POSIX");
++ ASSERT_TRUE(LocaleService::CanonicalizeLanguageId(locale));
++ ASSERT_TRUE(locale.EqualsLiteral("en-US"));
++
++ locale.AssignLiteral("en-US_POSIX");
++ ASSERT_TRUE(LocaleService::CanonicalizeLanguageId(locale));
++ ASSERT_TRUE(locale.EqualsLiteral("en-US-posix"));
++
++ locale.AssignLiteral("en-US-POSIX");
++ ASSERT_TRUE(LocaleService::CanonicalizeLanguageId(locale));
++ ASSERT_TRUE(locale.EqualsLiteral("en-US-posix"));
++
++ locale.AssignLiteral("C");
++ ASSERT_FALSE(LocaleService::CanonicalizeLanguageId(locale));
++ ASSERT_TRUE(locale.EqualsLiteral("und"));
++
++ locale.AssignLiteral("");
++ ASSERT_FALSE(LocaleService::CanonicalizeLanguageId(locale));
++ ASSERT_TRUE(locale.EqualsLiteral("und"));
++}
++
+ TEST(Intl_Locale_LocaleService, GetAppLocalesAsBCP47)
+ {
+ nsTArray<nsCString> appLocales;
+diff --git intl/locale/tests/unit/test_localeService.js intl/locale/tests/unit/test_localeService.js
+index 1970ae94b2607..202c2674972a8 100644
+--- old/intl/locale/tests/unit/test_localeService.js
++++ new/intl/locale/tests/unit/test_localeService.js
+@@ -204,8 +204,7 @@ add_test(function test_requestedLocales_sanitize() {
+ Assert.equal(locales[0], "de");
+ Assert.equal(locales[1], "pl");
+ Assert.equal(locales[2], "de-AT");
+- Assert.equal(locales[3], "und");
+- Assert.equal(locales.length, 4);
++ Assert.equal(locales.length, 3);
+
+ Services.prefs.clearUserPref(PREF_REQUESTED_LOCALES);
+