Add Compact Language Detection (CLD) library to Chrome. This works in Windows only currently.

BUG=none TEST=none Review URL: http://codereview.chromium.org/122007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@18445 0039d316-1c4b-4281-b951-d872f2087c98

Add Compact Language Detection (CLD) library to Chrome. This works in Windows only currently.
BUG=none TEST=none Review URL: http://codereview.chromium.org/122007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@18445 0039d316-1c4b-4281-b951-d872f2087c98
030bdf76 · sidchat@google.com · 30b0252c · 030bdf76 · 030bdf76 · 030bdf76
Commit 030bdf76 authored 15 years ago by sidchat@google.com
--- a/build/all.gyp
+++ b/build/all.gyp
@@ -63,6 +63,7 @@
            '../sandbox/sandbox.gyp:*',
            '../third_party/bsdiff/bsdiff.gyp:*',
            '../third_party/bspatch/bspatch.gyp:*',
+            '../third_party/cld/cld.gyp:*',
            '../third_party/tcmalloc/tcmalloc.gyp:*',
            '../tools/memory_watcher/memory_watcher.gyp:*',
            '../webkit/activex_shim/activex_shim.gyp:*',

--- a/chrome/chrome.gyp
+++ b/chrome/chrome.gyp
@@ -1756,6 +1756,7 @@
            '../google_update/google_update.gyp:google_update',
            'installer/installer.gyp:installer_util',
            '../printing/printing.gyp:printing',
+            '../third_party/cld/cld.gyp:cld',
            '../views/views.gyp:views',
            '../gears/gears.gyp:gears',
          ],
@@ -3959,6 +3960,7 @@
            '../third_party/bsdiff/bsdiff.gyp:*',
            '../third_party/bspatch/bspatch.gyp:*',
            '../third_party/bzip2/bzip2.gyp:*',
+            '../third_party/cld/cld.gyp:cld',
            '../third_party/codesighs/codesighs.gyp:*',
            '../third_party/ffmpeg/ffmpeg.gyp:*',
            '../third_party/icu38/icu38.gyp:*',
@@ -4008,6 +4010,7 @@
            #'theme_dll',
            'worker',
            '../net/net.gyp:net_resources',
+            '../third_party/cld/cld.gyp:cld',
            '../third_party/tcmalloc/tcmalloc.gyp:tcmalloc',
            '../views/views.gyp:views',
            '../webkit/webkit.gyp:webkit_resources',

--- a/third_party/cld/README.google
+++ b/third_party/cld/README.google
+CLD - list of changes (sidchat, May 2009)
+- cld_interface.cc - comment out 
+//#include "cld/bar/common/component.h"
+//#include "cld/bar/common/execute/execute_utils.h"
+and comment out requirements in function ::GetVerifiedDllFileName
+
+-------------
+Commented out the following in commandlineflags.h
+class FlagSaver {
+ public:
+  FlagSaver();
+  ~FlagSaver();
+
+ private:
+  class FlagSaverImpl* impl_;   // we use pimpl here to keep API steady
+
+  FlagSaver(const FlagSaver&);  // no copying!
+  void operator=(const FlagSaver&);
+}
+#ifndef SWIG   // swig seems to have trouble with this for some reason
+ATTRIBUTE_UNUSED
+#endif
+;
+----------------
\ No newline at end of file
--- a/third_party/cld/bar/common/scopedlibrary.h
+++ b/third_party/cld/bar/common/scopedlibrary.h
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BAR_COMMON_SCOPEDLIBRARY_H_
+#define BAR_COMMON_SCOPEDLIBRARY_H_
+
+
+// A scoped object to safely load and free a DLL referenced by name.
+// Provides an access to a handle to loaded library (HMODULE type).
+//
+// Example:
+//     ScopedLibrary library(LIBRARY_NAME);
+//     ... = ::GetProcAddress(library.handle(), FUNCTION_NAME);
+class ScopedLibrary {
+ public:
+  // Always creates initialized ScopedLibrary.
+  // [in] file_name - library's file name.
+  explicit ScopedLibrary(const TCHAR *file_name)
+      : library_(::LoadLibrary(file_name)) {}
+  // Unloads owned library, if any.
+  ~ScopedLibrary() {
+    if (library_ != NULL)
+      ::FreeLibrary(library_);
+  }
+  inline HMODULE handle() const { return library_; }
+  // Returns true if library was loaded successfully.
+  bool IsValid() const { return library_ != NULL; }
+
+ private:
+  // Handle to loaded library.
+  const HMODULE library_;
+  DISALLOW_COPY_AND_ASSIGN(ScopedLibrary);
+};
+
+
+// A class representing a pointer to a function retrieved from DLL.
+// FunctionPrototype is a regular C-style pointer-to-function type
+// definition. For example, type of WinAPI IsValidSid function:
+//     BOOL (WINAPI*)(PSID)
+//
+// Example:
+//     FunctionFromDll<BOOL (WINAPI*)(PSID)> is_valid_sid;
+//     ... = is_valid_sid.function()(...);
+template<typename FunctionPrototype>
+class FunctionFromDll {
+ public:
+  FunctionFromDll() : function_(NULL) {}
+  // Binds this object to a function from DLL.
+  // [in] library - handle to a library containing a function.
+  //     Must not be NULL.
+  // [in] name - name of the function.
+  void Bind(HMODULE library, const char *name) {
+    function_ =
+        reinterpret_cast<FunctionPrototype>(::GetProcAddress(library, name));
+  }
+  inline FunctionPrototype function() const { return function_; }
+  // Returns true if function was bound successfully.
+  bool IsValid() const { return function_ != NULL; }
+
+ private:
+  // Pointer to the function.
+  FunctionPrototype function_;
+  DISALLOW_COPY_AND_ASSIGN(FunctionFromDll);
+};
+
+
+#endif  // BAR_COMMON_SCOPEDLIBRARY_H_
--- a/third_party/cld/bar/common/scopedptr.h
+++ b/third_party/cld/bar/common/scopedptr.h
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BAR_COMMON_SCOPEDPTR_H_
+#define BAR_COMMON_SCOPEDPTR_H_
+
+//  Boxer for dumb types, allows you to associate cleanup code when the object
+//  falls off the stack. Destructor implementation must be provided for each
+//  type.
+template < class T >
+class ScopedObject {
+ public:
+  explicit ScopedObject(const T& v) : v_(v) { }
+  ~ScopedObject();
+
+  operator T() const { return v_; }
+  T get() const { return v_; }
+
+ private:
+  T v_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedObject);
+};
+
+// A scoped object for the various HANDLE- and LPVOID-based types.
+// destroy() implementation must be provided for each type.
+// Added by Breen Hagan of Google.
+template < class T, int DIFFERENTIATOR >
+class ScopedHandle {
+ public:
+  explicit ScopedHandle(const T& v) : v_(v) {}
+  ~ScopedHandle() {
+    destroy();
+  }
+
+  operator T() const { return v_; }
+  T get() const { return v_; }
+
+  void reset(const T& v) {
+    if (v_ != v) {
+      destroy();
+      v_ = v;
+    }
+  }
+
+  // Swap two scoped handlers.
+  void swap(ScopedHandle& h2) {
+    T tmp = v_;
+    v_ = h2.v_;
+    h2.v_ = tmp;
+  }
+
+  T release() {
+    T released_value(v_);
+    v_ = 0;
+    return released_value;
+  }
+
+ private:
+  void destroy();
+
+  T v_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedHandle);
+};
+
+// Free functions.
+template <class T, int DIFFERENTIATOR>
+inline void swap(ScopedHandle<T, DIFFERENTIATOR>& h1,
+                 ScopedHandle<T, DIFFERENTIATOR>& h2) {
+  h1.swap(h2);
+}
+
+
+// Uses ScopedHandle to automatically call CloseHandle().
+typedef ScopedHandle< HANDLE, 1 > SAFE_HANDLE;
+
+template <>
+inline void ScopedHandle< HANDLE, 1 >::destroy() {
+  if (v_)
+    ::CloseHandle(v_);
+}
+
+// Uses ScopedHandle to automatically call CryptReleaseContext().
+typedef ScopedHandle< HCRYPTPROV, 2 > SAFE_HCRYPTPROV;
+
+template <>
+inline void ScopedHandle< HCRYPTPROV, 2 >::destroy() {
+  if (v_)
+    ::CryptReleaseContext(v_, 0);
+}
+
+// Uses ScopedHandle to automatically call CryptDestroyKey().
+typedef ScopedHandle< HCRYPTKEY, 3 > SAFE_HCRYPTKEY;
+
+template <>
+inline void ScopedHandle< HCRYPTKEY, 3 >::destroy() {
+  if (v_)
+    ::CryptDestroyKey(v_);
+}
+
+// Uses ScopedHandle to automatically call CryptDestroyHash().
+typedef ScopedHandle< HCRYPTHASH, 4 > SAFE_HCRYPTHASH;
+
+template <>
+inline void ScopedHandle< HCRYPTHASH, 4 >::destroy() {
+  if (v_)
+    ::CryptDestroyHash(v_);
+}
+
+// Uses ScopedHandle to automatically call UnmapViewOfFile().
+typedef ScopedHandle< LPVOID, 5 > SAFE_MAPPEDVIEW;
+
+template <>
+inline void ScopedHandle< LPVOID, 5 >::destroy() {
+  if (v_)
+    ::UnmapViewOfFile(v_);
+}
+
+//  SAFE_HINTERNET
+//    Uses ScopedHandle to automatically call InternetCloseHandle().
+typedef ScopedHandle< HINTERNET, 6 > SAFE_HINTERNET;
+
+template <>
+inline void ScopedHandle< HINTERNET, 6 >::destroy() {
+  if (v_)
+    ::InternetCloseHandle(v_);
+}
+
+// SAFE_HMODULE
+//     Uses ScopedHandle to automatically call ::FreeLibrary().
+typedef ScopedHandle< HMODULE, 7 > SAFE_HMODULE;
+
+template <>
+inline void ScopedHandle< HMODULE, 7 >::destroy() {
+  if (v_)
+    ::FreeLibrary(v_);
+}
+
+// SAFE_RESOURCE
+//     Uses ScopedHandle to automatically call ::FreeResource().
+//     The type is HGLOBAL for backward compatibility, see MSDN, LoadResource()
+//     function for details.
+typedef ScopedHandle< HGLOBAL, 8 > SAFE_RESOURCE;
+
+template <>
+inline void ScopedHandle< HGLOBAL, 8 >::destroy() {
+  if (v_)
+    ::FreeResource(v_);
+}
+
+
+// ScopedIntCounter is a class that will increment given integet on construction
+// and decrement it when the class is destructed.
+class ScopedIntCounter {
+ public:
+  ScopedIntCounter(int *counter):
+    counter_(counter) {
+    (*counter_)++;
+  }
+
+  ~ScopedIntCounter() {
+    (*counter_)--;
+  }
+
+  int count() {
+    return *counter_;
+  }
+
+ private:
+  int* counter_;
+};
+
+#endif // BAR_COMMON_SCOPEDPTR_H_
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.cc
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.cc
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.h
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.h
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil_dbg.h
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil_dbg.h
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef I18N_ENCODINGS_COMPACT_LANG_DET_CLDUTIL_DBG_H_
+#define I18N_ENCODINGS_COMPACT_LANG_DET_CLDUTIL_DBG_H_
+
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.h"
+#include <string>
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/ext_lang_enc.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/tote.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_basictypes.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_commandlineflags.h"
+
+DECLARE_bool(dbgscore);
+DECLARE_bool(dbglookup);
+DECLARE_bool(dbgreli);
+
+
+namespace cld {
+
+
+//------------------------------------------------------------------------------
+// Debugging. Not thread safe
+//------------------------------------------------------------------------------
+
+  void DbgScoreInit(const char* src, int len);
+
+  // Return a 3-byte + NUL code for language
+  void DbgLangName3(Language lang, char* temp);
+
+  // Show all per-language totals
+  void DbgScoreState();
+
+  void DbgScoreTop(const char* src, int srclen, Tote* chunk_tote);
+
+  void DbgScoreFlush();
+
+  // Allow additional scoring debug output
+  void DbgScoreRecord(const char* src, uint32 probs, int len);
+
+  void DbgScoreRecordUni(const char* src, int propval, int len);
+
+  // Debug print language name(s)
+  void PrintLang(FILE* f, const Tote* chunk_tote,
+                 const Language cur_lang, const bool cur_unreliable,
+                 Language prior_lang, bool prior_unreliable);
+
+  // Debug print language name(s)
+  void PrintLang2(FILE* f,
+                  const Language lang1, const Language lang2, bool diff_prior);
+
+  // Debug print text span
+  void PrintText(FILE* f, Language cur_lang, const string& str);
+
+  // Debug print text span with speculative language
+  void PrintTextSpeculative(FILE* f, Language cur_lang, const string& str);
+
+  // Debug print ignored text span
+  void PrintSkippedText(FILE* f, const string& str);
+
+  void DbgProbsToStderr(uint32 probs);
+  void DbgUniTermToStderr(int propval, const uint8* usrc, int len);
+  // No pre/post space
+  void DbgBiTermToStderr(uint32 bihash, uint32 probs,
+                          const char* src, int len);
+  void DbgQuadTermToStderr(uint32 quadhash, uint32 probs,
+                          const char* src, int len);
+  void DbgWordTermToStderr(uint64 wordhash, uint32 probs,
+                          const char* src, int len);
+
+}       // End namespace cld
+
+
+#endif  // I18N_ENCODINGS_COMPACT_LANG_DET_CLDUTIL_DBG_H_
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil_dbg_empty.cc
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil_dbg_empty.cc
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil_dbg.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_commandlineflags.h"
+
+DEFINE_bool(dbgscore, false, "Print picture of score calculation");
+DEFINE_bool(dbglookup, false, "Print every quad/uni lookup in score calc");
+DEFINE_bool(dbgreli, false, "Print reliability in score calc");
+
+namespace cld {
+
+
+//------------------------------------------------------------------------------
+// Debugging. Not thread safe
+// This is the empty version -- routines return immediately
+//------------------------------------------------------------------------------
+
+  void DbgScoreInit(const char* src, int len) {};
+
+  // Return a 3-byte + NUL code for language
+  void DbgLangName3(Language lang, char* temp) {};
+
+  // Show all per-language totals
+  void DbgScoreState() {};
+
+  void DbgScoreTop(const char* src, int srclen, Tote* chunk_tote) {};
+
+  void DbgScoreFlush() {};
+
+  // Allow additional scoring debug output
+  void DbgScoreRecord(const char* src, uint32 probs, int len) {};
+
+  void DbgScoreRecordUni(const char* src, int propval, int len) {};
+
+  // Debug print language name(s)
+  void PrintLang(FILE* f, const Tote* chunk_tote,
+                 const Language cur_lang, const bool cur_unreliable,
+                 Language prior_lang, bool prior_unreliable) {};
+
+  // Debug print language name(s)
+  void PrintLang2(FILE* f,
+                  const Language lang1, const Language lang2, bool diff_prior) {};
+
+  // Debug print text span
+  void PrintText(FILE* f, Language cur_lang, const string& str) {};
+
+  // Debug print text span with speculative language
+  void PrintTextSpeculative(FILE* f, Language cur_lang, const string& str) {};
+
+  // Debug print ignored text span
+  void PrintSkippedText(FILE* f, const string& str) {};
+
+  void DbgProbsToStderr(uint32 probs) {};
+  void DbgUniTermToStderr(int propval, const uint8* usrc, int len) {};
+  // No pre/post space
+  void DbgBiTermToStderr(uint32 bihash, uint32 probs,
+                          const char* src, int len) {};
+  void DbgQuadTermToStderr(uint32 quadhash, uint32 probs,
+                          const char* src, int len) {};
+  void DbgWordTermToStderr(uint64 wordhash, uint32 probs,
+                          const char* src, int len) {};
+
+
+}       // End namespace cld
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.cc
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.cc
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_basictypes.h"
+
+// String is "code_version - data_scrape_date"
+static const char* kDetectLanguageVersion = "V1.6 - 20081121";
+
+// Large-table version for all ~160 languages (all Tiers)
+
+// Scan interchange-valid UTF-8 bytes and detect most likely language
+Language CompactLangDet::DetectLanguage(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          bool* is_reliable) {
+  bool allow_extended_lang = false;
+  Language language3[3];
+  int percent3[3];
+  double normalized_score3[3];
+  int text_bytes;
+  int flags = 0;
+  Language plus_one = UNKNOWN_LANGUAGE;
+  const char* tld_hint = "";
+  int encoding_hint = UNKNOWN_ENCODING;
+  Language language_hint = UNKNOWN_LANGUAGE;
+
+  Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+                          buffer,
+                          buffer_length,
+                          is_plain_text,
+                          tld_hint,               // "id" boosts Indonesian
+                          encoding_hint,          // SJS boosts Japanese
+                          language_hint,          // ITALIAN boosts it
+                          allow_extended_lang,
+                          flags,
+                          plus_one,
+                          language3,
+                          percent3,
+                          normalized_score3,
+                          &text_bytes,
+                          is_reliable);
+  // Default to English.
+  if (lang == UNKNOWN_LANGUAGE) {
+    lang = ENGLISH;
+  }
+  return lang;
+}
+
+// Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
+Language CompactLangDet::DetectLanguageSummary(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          Language* language3,
+                          int* percent3,
+                          int* text_bytes,
+                          bool* is_reliable) {
+  double normalized_score3[3];
+  bool allow_extended_lang = false;
+  int flags = 0;
+  Language plus_one = UNKNOWN_LANGUAGE;
+  const char* tld_hint = "";
+  int encoding_hint = UNKNOWN_ENCODING;
+  Language language_hint = UNKNOWN_LANGUAGE;
+
+  Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+                          buffer,
+                          buffer_length,
+                          is_plain_text,
+                          tld_hint,               // "id" boosts Indonesian
+                          encoding_hint,          // SJS boosts Japanese
+                          language_hint,          // ITALIAN boosts it
+                          allow_extended_lang,
+                          flags,
+                          plus_one,
+                          language3,
+                          percent3,
+                          normalized_score3,
+                          text_bytes,
+                          is_reliable);
+  // Default to English
+  if (lang == UNKNOWN_LANGUAGE) {
+    lang = ENGLISH;
+  }
+  return lang;
+}
+
+// Same as above, with hints supplied
+// Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
+Language CompactLangDet::DetectLanguageSummary(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          const char* tld_hint,       // "id" boosts Indonesian
+                          int encoding_hint,          // SJS boosts Japanese
+                          Language language_hint,     // ITALIAN boosts it
+                          Language* language3,
+                          int* percent3,
+                          int* text_bytes,
+                          bool* is_reliable) {
+  double normalized_score3[3];
+  bool allow_extended_lang = false;
+  int flags = 0;
+  Language plus_one = UNKNOWN_LANGUAGE;
+
+  Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+                          buffer,
+                          buffer_length,
+                          is_plain_text,
+                          tld_hint,               // "id" boosts Indonesian
+                          encoding_hint,          // SJS boosts Japanese
+                          language_hint,          // ITALIAN boosts it
+                          allow_extended_lang,
+                          flags,
+                          plus_one,
+                          language3,
+                          percent3,
+                          normalized_score3,
+                          text_bytes,
+                          is_reliable);
+  // Default to English
+  if (lang == UNKNOWN_LANGUAGE) {
+    lang = ENGLISH;
+  }
+  return lang;
+}
+
+
+// Scan interchange-valid UTF-8 bytes and detect list of top 3 extended
+// languages.
+// Extended languages are additional Google interface languages and Unicode
+// single-language scripts, from ext_lang_enc.h
+Language CompactLangDet::ExtDetectLanguageSummary(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          Language* language3,
+                          int* percent3,
+                          int* text_bytes,
+                          bool* is_reliable) {
+  double normalized_score3[3];
+  bool allow_extended_lang = true;
+  int flags = 0;
+  Language plus_one = UNKNOWN_LANGUAGE;
+  const char* tld_hint = "";
+  int encoding_hint = UNKNOWN_ENCODING;
+  Language language_hint = UNKNOWN_LANGUAGE;
+
+  Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+                          buffer,
+                          buffer_length,
+                          is_plain_text,
+                          tld_hint,               // "id" boosts Indonesian
+                          encoding_hint,          // SJS boosts Japanese
+                          language_hint,          // ITALIAN boosts it
+                          allow_extended_lang,
+                          flags,
+                          plus_one,
+                          language3,
+                          percent3,
+                          normalized_score3,
+                          text_bytes,
+                          is_reliable);
+  // Do not default to English
+  return lang;
+}
+
+// Same as above, with hints supplied
+// Scan interchange-valid UTF-8 bytes and detect list of top 3 extended
+// languages.
+// Extended languages are additional Google interface languages and Unicode
+// single-language scripts, from ext_lang_enc.h
+Language CompactLangDet::ExtDetectLanguageSummary(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          const char* tld_hint,       // "id" boosts Indonesian
+                          int encoding_hint,          // SJS boosts Japanese
+                          Language language_hint,     // ITALIAN boosts it
+                          Language* language3,
+                          int* percent3,
+                          int* text_bytes,
+                          bool* is_reliable) {
+  double normalized_score3[3];
+  bool allow_extended_lang = true;
+  int flags = 0;
+  Language plus_one = UNKNOWN_LANGUAGE;
+
+  Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+                          buffer,
+                          buffer_length,
+                          is_plain_text,
+                          tld_hint,               // "id" boosts Indonesian
+                          encoding_hint,          // SJS boosts Japanese
+                          language_hint,          // ITALIAN boosts it
+                          allow_extended_lang,
+                          flags,
+                          plus_one,
+                          language3,
+                          percent3,
+                          normalized_score3,
+                          text_bytes,
+                          is_reliable);
+  // Do not default to English
+  return lang;
+}
+
+// Same as above, and also returns internal language scores as a ratio to
+// normal score for real text in that language. Scores close to 1.0 indicate
+// normal text, while scores far away from 1.0 indicate badly-skewed text or
+// gibberish
+//
+Language CompactLangDet::ExtDetectLanguageSummary(
+                        const char* buffer,
+                        int buffer_length,
+                        bool is_plain_text,
+                        const char* tld_hint,       // "id" boosts Indonesian
+                        int encoding_hint,          // SJS boosts Japanese
+                        Language language_hint,     // ITALIAN boosts it
+                        Language* language3,
+                        int* percent3,
+                        double* normalized_score3,
+                        int* text_bytes,
+                        bool* is_reliable) {
+  bool allow_extended_lang = true;
+  int flags = 0;
+  Language plus_one = UNKNOWN_LANGUAGE;
+
+  Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+                          buffer,
+                          buffer_length,
+                          is_plain_text,
+                          tld_hint,               // "id" boosts Indonesian
+                          encoding_hint,          // SJS boosts Japanese
+                          language_hint,          // ITALIAN boosts it
+                          allow_extended_lang,
+                          flags,
+                          plus_one,
+                          language3,
+                          percent3,
+                          normalized_score3,
+                          text_bytes,
+                          is_reliable);
+  // Do not default to English
+  return lang;
+  }
+
+
+
+// Return version text string
+// String is "code_version - data_scrape_date"
+const char* CompactLangDet::DetectLanguageVersion() {
+  return kDetectLanguageVersion;
+}
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.h
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.h
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// NOTE:
+// This code has not yet been evaluated against LangId, which is the official
+// production language identification system. However, it seems to be of
+// similar precison overall, and it covers all the Google languages in
+//   i18n/languages/proto/languages.proto
+// except the four Creoles_and_Pigins.
+
+// Baybayin (ancient script of the Philippines) is detected as TAGALOG.
+// Chu Nom (Vietnamese ancient Han characters) is detected as VIETNAMESE.
+// HAITIAN_CREOLE is detected as such.
+// NORWEGIAN and NORWEGIAN_N are detected separately (but not robustly)
+// PORTUGUESE, PORTUGUESE_P, and PORTUGUESE_B are all detected as PORTUGUESE.
+// ROMANIAN-Latin is detected as ROMANIAN; ROMANIAN-Cyrillic as MOLDAVIAN.
+// SERBO_CROATIAN, BOSNIAN, CROATIAN, SERBIAN, MONTENEGRIN in the Latin script
+//  are all detected as CROATIAN; in the Cyrillic script as SERBIAN.
+// Zhuang is detected in the Latin script only.
+//
+// The Google interface languages X_PIG_LATIN and X_KLINGON are detected in the
+//  extended calls ExtDetectLanguageSummary(). BorkBorkBork, ElmerFudd, and
+//  Hacker are not detected (too little training data).
+//
+// UNKNOWN_LANGUAGE is returned if no language's internal reliablity measure
+//  is high enough. This happens with non-text input such as the bytes of a
+//  JPEG, and also with some text in languages outside the Google Language
+//  enum, such as Ilonggo.
+//
+// The following languages are detected in multiple scripts:
+//  AZERBAIJANI (Latin, Cyrillic*, Arabic*)
+//  BURMESE (Latin, Myanmar)
+//  HAUSA (Latin, Arabic)
+//  KASHMIRI (Arabic, Devanagari)
+//  KAZAKH (Latin, Cyrillic, Arabic)
+//  KURDISH (Latin*, Arabic)
+//  KYRGYZ (Cyrillic, Arabic)
+//  LIMBU (Devanagari, Limbu)
+//  MONGOLIAN (Cyrillic, Mongolian)
+//  SANSKRIT (Latin, Devanagari)
+//  SINDHI (Arabic, Devanagari)
+//  TAGALOG (Latin, Tagalog)
+//  TAJIK (Cyrillic, Arabic*)
+//  TATAR (Latin, Cyrillic, Arabic)
+//  TURKMEN (Latin, Cyrillic, Arabic)
+//  UIGHUR (Latin, Cyrillic, Arabic)
+//  UZBEK (Latin, Cyrillic, Arabic)
+//
+// * Due to a shortage of training text, AZERBAIJANI is not currently detected
+//   in Arabic or Cyrillic scripts, nor KURDISH in Latin script, nor TAJIK in
+//   Arabic script.
+//
+
+#ifndef I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_H_
+#define I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_H_
+
+#include "third_party/cld/bar/toolbar/cld/i18n/languages/public/languages.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.h"
+
+namespace CompactLangDet {
+  // Scan interchange-valid UTF-8 bytes and detect most likely language,
+  // or set of languages.
+  //
+  // Design goals:
+  //   Skip over big stretches of HTML tags
+  //   Able to return ranges of different languages
+  //   Relatively small tables and relatively fast processing
+  //   Thread safe
+  //
+  // For HTML documents, tags are skipped, along with <script> ... </script>
+  // and <style> ... </style> sequences, and entities are expanded.
+  //
+  // We distinguish between bytes of the raw input buffer and bytes of non-tag
+  // text letters. Since tags can be over 50% of the bytes of an HTML Page,
+  // and are nearly all seven-bit ASCII English, we prefer to distinguish
+  // language mixture fractions based on just the non-tag text.
+  //
+  // Inputs: text and text_length
+  //  Code skips HTML tags and expands HTML entities, unless
+  //  is_plain_text is true
+  // Outputs:
+  //  language3 is an array of the top 3 languages or UNKNOWN_LANGUAGE
+  //  percent3 is an array of the text percentages 0..100 of the top 3 languages
+  //  text_bytes is the amount of non-tag/letters-only text found
+  //  is_reliable set true if the returned Language is some amount more
+  //  probable then the second-best Language. Calculation is a complex function
+  //  of the length of the text and the different-script runs of text.
+  // Return value: the most likely Language for the majority of the input text
+  //  Length 0 input returns UNKNOWN_LANGUAGE. Very short indeterminate text
+  //  defaults to ENGLISH.
+  //
+  // The first two versions return ENGLISH instead of UNKNOWN_LANGUAGE, for
+  // backwards compatibility with LLD.
+  //
+  // The third version may return UNKNOWN_LANGUAGE, and also returns extended
+  // language codes from ext_lang_enc.h
+  //
+  // Subsetting: For fast detection over large documents, these routines will
+  // scan non-tag text of the initial part of a document, then will
+  // skip 4-16 bytes and subsample text in the rest of the document, up to a
+  // fixed limit (currently 160KB of non-tag letters).
+  //
+
+  // Scan interchange-valid UTF-8 bytes and detect most likely language
+  Language DetectLanguage(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          bool* is_reliable);
+
+  // Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
+  // language3[0] is also the return value
+  Language DetectLanguageSummary(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          Language* language3,
+                          int* percent3,
+                          int* text_bytes,
+                          bool* is_reliable);
+
+  // Same as above, with hints supplied
+  // Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
+  // language3[0] is also the return value
+  Language DetectLanguageSummary(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          const char* tld_hint,       // "id" boosts Indonesian
+                          int encoding_hint,          // SJS boosts Japanese
+                          Language language_hint,     // ITALIAN boosts it
+                          Language* language3,
+                          int* percent3,
+                          int* text_bytes,
+                          bool* is_reliable);
+
+  // Scan interchange-valid UTF-8 bytes and detect list of top 3 extended
+  // languages.
+  //
+  // Extended languages are additional Google interface languages and Unicode
+  // single-language scripts, from ext_lang_enc.h. They are experimental and
+  // this call may be removed.
+  //
+  // language3[0] is also the return value
+  Language ExtDetectLanguageSummary(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          Language* language3,
+                          int* percent3,
+                          int* text_bytes,
+                          bool* is_reliable);
+
+  // Same as above, with hints supplied
+  // Scan interchange-valid UTF-8 bytes and detect list of top 3 extended
+  // languages.
+  //
+  // Extended languages are additional Google interface languages and Unicode
+  // single-language scripts, from ext_lang_enc.h. They are experimental and
+  // this call may be removed.
+  //
+  // language3[0] is also the return value
+  Language ExtDetectLanguageSummary(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          const char* tld_hint,       // "id" boosts Indonesian
+                          int encoding_hint,          // SJS boosts Japanese
+                          Language language_hint,     // ITALIAN boosts it
+                          Language* language3,
+                          int* percent3,
+                          int* text_bytes,
+                          bool* is_reliable);
+
+  // Same as above, and also returns internal language scores as a ratio to
+  // normal score for real text in that language. Scores close to 1.0 indicate
+  // normal text, while scores far away from 1.0 indicate badly-skewed text or
+  // gibberish
+  //
+  Language ExtDetectLanguageSummary(
+                          const char* buffer,
+                          int buffer_length,
+                          bool is_plain_text,
+                          const char* tld_hint,       // "id" boosts Indonesian
+                          int encoding_hint,          // SJS boosts Japanese
+                          Language language_hint,     // ITALIAN boosts it
+                          Language* language3,
+                          int* percent3,
+                          double* normalized_score3,
+                          int* text_bytes,
+                          bool* is_reliable);
+
+  // Return version text string
+  // String is "code_version - data_scrape_date"
+  const char* DetectLanguageVersion();
+};      // End namespace CompactLangDet
+
+#endif  // I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_H_
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_generated_cjkbis_0.cc
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_generated_cjkbis_0.cc
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.h"
+
+// Suppressed:
+//      az-Arab az-Cyrl ku-Latn tg-Arab za-Hani zzb-Latn zze-Latn zzh-Latn ru-Latn
+
+// Remapped:
+//      xxx-Latn=>ut-Latn sh-Latn=>hr-Latn sh-Cyrl=>sr-Cyrl
+
+static const int kCjkBiTableBuildDate = 20090129;    // yyyymmdd
+static const int kCjkBiTableSize = 1;    // Bucket count
+static const int kCjkBiTableKeyMask = 0xffffffff;    // Mask hash key
+
+COMPILE_ASSERT(MONTENEGRIN == 160, k_montenegrin_changed);
+COMPILE_ASSERT(EXT_NUM_LANGUAGES == 209, k_ext_num_languages_changed);
+
+// Empty table
+static const cld::IndirectProbBucket4 kCjkBiTable[kCjkBiTableSize] = {
+  // key[4], words[4] in UTF-8
+  // value[4]
+  { {0x00000000,0x00000000,0x00000000,0x00000000}},  // [000] c
+};
+
+static const uint32 kCjkBiTableInd[1] = {
+  // [0000]
+  0x00000000, };
+
+COMPILE_ASSERT(1 < (1 << 16), k_indirectbits_too_small);
+
+
+extern const cld::CLDTableSummary kCjkBiTable_obj = {
+  kCjkBiTable,
+  kCjkBiTableInd,
+  kCjkBiTableSize,
+  ARRAYSIZE(kCjkBiTableInd),
+  kCjkBiTableKeyMask,
+  kCjkBiTableBuildDate,
+};
+
+// End of generated tables
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_generated_ctjkvz.cc
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_generated_ctjkvz.cc
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_generated_longwords8_0.cc
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_generated_longwords8_0.cc
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.h"
+
+// Suppressed:
+//      az-Arab az-Cyrl ku-Latn tg-Arab za-Hani zzb-Latn zze-Latn zzh-Latn ru-Latn
+
+// Remapped:
+//      xxx-Latn=>ut-Latn sh-Latn=>hr-Latn sh-Cyrl=>sr-Cyrl
+
+// ms/id probabilities leveled
+
+static const int kLongWord8TableBuildDate = 20081007;    // yyyymmdd
+
+COMPILE_ASSERT(MONTENEGRIN == 160, k_montenegrin_changed);
+COMPILE_ASSERT(EXT_NUM_LANGUAGES == 209, k_ext_num_languages_changed);
+
+static const int kLongWord8TableSize = 1;    // Bucket count
+static const int kLongWord8TableKeyMask = 0xffffffff;    // Mask hash key
+
+COMPILE_ASSERT(MONTENEGRIN == 160, k_montenegrin_changed);
+COMPILE_ASSERT(EXT_NUM_LANGUAGES == 209, k_ext_num_languages_changed);
+
+// Empty table
+static const cld::IndirectProbBucket4 kLongWord8Table[kLongWord8TableSize] = {
+  // key[4], words[4] in UTF-8
+  // value[4]
+  { {0x00000000,0x00000000,0x00000000,0x00000000}},	// [000] c
+};
+
+static const uint32 kLongWord8TableInd[1] = {
+  // [0000]
+  0x00000000, };
+
+COMPILE_ASSERT(1 < (1 << 16), k_indirectbits_too_small);
+
+
+extern const cld::CLDTableSummary kLongWord8Table_obj = {
+  kLongWord8Table,
+  kLongWord8TableInd,
+  kLongWord8TableSize,
+  ARRAYSIZE(kLongWord8TableInd),
+  kLongWord8TableKeyMask,
+  kLongWord8TableBuildDate,
+};
+
+// End of generated tables
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_generated_meanscore.h
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_generated_meanscore.h
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_GENERATED_MEANSCORE_H__
+#define I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_GENERATED_MEANSCORE_H__
+
+#if 0
+// Generated 2008.05.08 with boot3abc
+static const short kMeanScore[256 * 4] = {
+  769, 0, 0, 0,  1011, 0, 0, 0,  1263, 0, 0, 0,  1934, 0, 0, 0,
+  1039, 0, 0, 0,  1296, 0, 0, 0,  0, 0, 0, 1216,  907, 0, 0, 0,
+  0, 0, 0, 3032,  0, 0, 0, 3423,  971, 0, 0, 0,  1855, 0, 0, 0,
+  794, 0, 0, 0,  0, 1099, 0, 0,  733, 0, 0, 0,  1201, 0, 0, 0,
+  0, 0, 0, 1523,  1539, 0, 0, 0,  0, 0, 0, 1024,  1677, 0, 0, 0,
+  1929, 0, 0, 0,  1917, 0, 0, 0,  1414, 0, 0, 0,  1954, 0, 0, 0,
+  1183, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 921, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  1425, 0, 0, 0,  621, 0, 0, 0,
+
+  1498, 0, 0, 0,  1532, 0, 0, 0,  0, 1021, 0, 0,  0, 0, 0, 823,
+  0, 748, 0, 0,  0, 0, 0, 863,  1543, 0, 0, 0,  854, 0, 0, 0,
+  1131, 0, 0, 0,  0, 0, 0, 1024,  1539, 0, 0, 0,  0, 0, 0, 907,
+  0, 0, 0, 1024,  1257, 0, 0, 0,  0, 0, 0, 1024,  0, 1029, 0, 0,
+  893, 0, 0, 0,  599, 0, 0, 0,  0, 0, 1241, 0,  0, 0, 0, 642,
+  0, 0, 0, 1024,  0, 0, 0, 1024,  0, 0, 1165, 0,  875, 0, 0, 0,
+  826, 0, 0, 0,  1225, 0, 0, 0,  369, 0, 0, 0,  0, 0, 0, 1024,
+  0, 0, 0, 1024,  1667, 0, 0, 0,  1021, 0, 0, 0,  1579, 0, 0, 0,
+
+  0, 0, 0, 594,  1226, 0, 0, 0,  1873, 0, 0, 0,  1041, 0, 0, 0,
+  1528, 0, 0, 0,  0, 0, 0, 1606,  1203, 0, 0, 0,  860, 0, 0, 0,
+  1303, 894, 1204, 0,  0, 0, 0, 714,  1679, 0, 0, 0,  0, 0, 0, 1024,
+  0, 0, 0, 817,  0, 0, 977, 0,  0, 0, 0, 0,  0, 0, 0, 1024,
+  663, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  1326, 0, 0, 0,
+  1312, 0, 0, 0,  1480, 0, 0, 0,  1406, 0, 0, 0,  1605, 953, 0, 0,
+  0, 835, 1296, 0,  1205, 0, 0, 0,  1321, 0, 0, 0,  0, 0, 0, 1234,
+  944, 649, 0, 0,  1429, 0, 0, 0,  1402, 1109, 1055, 0,  0, 0, 1108, 0,
+
+  0, 1193, 0, 0,  0, 0, 0, 1024,  0, 0, 0, 1024,  0, 0, 1052, 0,
+  877, 0, 0, 0,  888, 0, 0, 0,  1170, 0, 0, 0,  1578, 0, 0, 1024,
+  0, 0, 0, 1024,  0, 0, 0, 895,  0, 0, 0, 1024,  0, 0, 0, 1024,
+  0, 0, 0, 0,  826, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 745,
+  965, 0, 0, 0,  645, 0, 0, 0,  927, 846, 0, 0,  1336, 0, 0, 0,
+  // 0, 655, 0, 0,  0, 0, 982, 0,  1778, 0, 0, 0,  1563, 0, 0, 0,   // original
+  0,1233, 0, 0,  0, 0, 982, 0,  1778, 0, 0, 0,  1563, 0, 0, 0,      // Moldavian[116] 2008.08.08
+  0, 746, 0, 0,  928, 509, 0, 0,  0, 0, 0, 0,  1226, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  // There are a lot of Afar false hits on lines with "radar" or "radares"
+  // Also lines with "libdata" and related
+  // So I artifically removed Afar from _rada, _libd etc. in quads table.
+
+  // 1308, 0, 0, 0,  1031, 0, 0, 0,  0, 1022, 0, 0,  0, 0, 0, 0,    // original
+  1308, 0, 0, 0,  1031, 0, 0, 0,  0, 1022, 0, 0,  1762, 0, 0, 0,    // Afar[131] 2008.09.05
+  1918, 0, 0, 0,  0, 958, 0, 0,  1761, 0, 0, 0,  0, 0, 0, 913,
+  1564, 0, 0, 0,  2155, 0, 0, 0,  1113, 0, 0, 0,  1402, 0, 0, 0,
+  2372, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  1184, 0, 0, 0,
+  1650, 0, 0, 0,  1482, 0, 0, 0,  1746, 0, 0, 0,  1481, 0, 0, 0,
+  1313, 0, 0, 0,  1720, 0, 0, 0,  1579, 0, 0, 458,  1192, 0, 0, 0,
+  1346, 0, 0, 0,  1402, 0, 0, 0,  1462, 0, 0, 0,  2228, 0, 0, 1498,
+  // 0, 0, 0, 0,  1199, 0, 0, 0,  1462, 0, 0, 0,  1636, 0, 0, 0,    // original
+  1376, 0, 0, 0,  1199, 0, 0, 0,  1462, 0, 0, 0,  1636, 0, 0, 0,    // Khasi[156] 2008.09.05
+
+  0, 0, 0, 0,  2060, 0, 0, 0,  0, 0, 0, 0,  1836, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 1024,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  };
+#endif
+
+
+#if 1
+// Generated 2008.10.01 from
+//   /export/hda3/cld/20080409/b0123a_9000_samp_prune.utf8
+// Four-byte quads, four-byte longwords (included here)
+// Exteneded languages start at [165]
+// ks-Deva fix included
+//
+static const short kMeanScore[256 * 4] = {
+  612, 0, 0, 0,  614, 0, 0, 0,  799, 0, 0, 0,  1310, 0, 0, 0,
+  678, 0, 0, 0,  887, 0, 0, 0,  0, 0, 0, 1073,  510, 0, 0, 0,
+  0, 0, 0, 3109,  0, 0, 0, 3423,  563, 0, 0, 0,  1406, 0, 0, 0,
+  509, 0, 0, 0,  0, 750, 0, 0,  449, 0, 0, 0,  825, 0, 0, 0,
+  0, 0, 0, 1820,  1153, 0, 0, 0,  0, 0, 0, 1024,  1443, 0, 0, 0,
+  1458, 0, 0, 0,  1320, 0, 0, 0,  1002, 0, 0, 0,  1518, 0, 0, 0,
+  972, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 606, 0, 0,
+  735, 0, 0, 0,  0, 557, 0, 0,  1098, 0, 0, 0,  496, 0, 0, 0,
+
+  1060, 0, 0, 0,  1293, 0, 0, 0,  0, 709, 0, 0,  0, 0, 0, 676,
+  0, 656, 0, 0,  0, 0, 0, 671,  545, 0, 0, 0,  698, 0, 0, 0,
+  584, 0, 0, 0,  0, 0, 0, 1024,  1422, 0, 0, 0,  0, 0, 0, 754,
+  0, 0, 0, 1024,  1182, 0, 0, 0,  0, 0, 0, 1024,  0, 860, 0, 0,
+  685, 0, 0, 0,  438, 0, 0, 0,  0, 0, 1111, 0,  0, 0, 0, 613,
+  0, 0, 0, 1024,  0, 0, 0, 1024,  0, 0, 1019, 0,  600, 0, 0, 0,
+  746, 0, 0, 0,  1001, 0, 0, 0,  350, 0, 0, 0,  0, 0, 0, 1024,
+  0, 0, 0, 1024,  1318, 0, 0, 0,  812, 0, 0, 0,  1130, 0, 0, 0,
+
+  0, 0, 0, 507,  972, 0, 0, 0,  1539, 0, 0, 0,  787, 0, 0, 0,
+  1174, 0, 0, 0,  0, 0, 0, 1780,  911, 0, 0, 0,  695, 0, 0, 0,
+  1074, 881, 968, 0,  0, 0, 0, 571,  1377, 0, 0, 0,  0, 0, 0, 1024,
+  0, 0, 0, 739,  0, 0, 876, 0,  0, 0, 0, 0,  0, 0, 0, 1024,
+  427, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  978, 0, 0, 0,
+  1031, 0, 0, 0,  1182, 0, 0, 0,  1094, 0, 0, 0,  1352, 907, 0, 0,
+  0, 790, 1060, 0,  950, 0, 0, 0,  1169, 0, 0, 0,  0, 0, 0, 1059,
+  0, 0, 0, 0,  1094, 0, 0, 0,  1127, 1023, 841, 0,  0, 0, 968, 0,
+
+  0, 1028, 0, 0,  0, 0, 0, 1024,  0, 0, 0, 1024,  0, 0, 891, 0,
+  688, 0, 0, 0,  664, 0, 0, 0,  864, 0, 0, 0,  1292, 0, 0, 1024,
+  0, 0, 0, 1024,  0, 0, 0, 753,  0, 0, 0, 1024,  0, 0, 0, 1024,
+  0, 0, 0, 0,  489, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 573,
+  755, 0, 0, 0,  565, 0, 0, 0,  727, 836, 917, 0,  1080, 0, 0, 0,
+  0, 583, 0, 0,  0, 0, 815, 0,  1425, 0, 0, 0,  1295, 0, 0, 0,
+  0, 912, 0, 0,  1210, 708, 0, 0,  0, 0, 0, 0,  988, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  1194, 0, 0, 0,  840, 0, 0, 0,  0, 879, 0, 0,  0, 0, 0, 0,
+  1669, 0, 0, 0,  0, 846, 0, 0,  1451, 0, 0, 0,  0, 0, 0, 808,
+  1317, 0, 0, 0,  1685, 0, 0, 0,  911, 0, 0, 0,  1173, 0, 0, 0,
+  1897, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  860, 0, 0, 0,
+  1424, 0, 0, 0,  1100, 0, 0, 0,  1360, 0, 0, 0,  1166, 0, 0, 0,
+  1012, 0, 0, 0,  1749, 0, 0, 0,  1381, 0, 0, 513,  928, 0, 0, 0,
+  1147, 0, 0, 0,  1163, 0, 0, 0,  1029, 0, 0, 0,  1873, 0, 0, 0,
+  0, 0, 0, 0,  779, 0, 0, 0,  1130, 0, 0, 0,  1426, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  1781, 0, 0, 0,  0, 0, 0, 0,
+  1463, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 1024,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  };
+
+#endif
+
+
+#if 0
+
+// Default value for starting over building this data
+static const short kMeanScore[256 * 4] = {
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
+
+};
+#endif
+
+
+#endif  // I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_GENERATED_MEANSCORE_H__
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_generated_quads_128.cc
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_generated_quads_128.cc
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.cc
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.cc
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.h
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.h
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_IMPL_H_
+#define I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_IMPL_H_
+
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/lang_enc.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_basictypes.h"
+
+
+static const int kCLDFlagFinish = 1;
+static const int kCLDFlagSqueeze = 2;
+static const int kCLDFlagRepeats = 4;
+static const int kCLDFlagTop40 = 8;
+static const int kCLDFlagShort = 16;
+static const int kCLDFlagHint = 32;   // Experimental, undebugged
+static const int kCLDFlagUseWords = 64;
+
+/***
+
+Flag meanings:
+
+Flags are used in the context of a recursive call from Detect to itself,
+trying to deal in a more restrictive way with input that was not reliably
+identified in the top-level call.
+
+Finish -- Do not further recurse; return whatever result ensues, even if it is
+          unreliable. Typically set in any recursive call to take a second try
+          on unreliable text.
+
+Squeeze -- For each text run, do an inplace cheapsqueeze to remove chunks of
+          highly repetitive text and chunks of text with too many 1- and
+          2-letter words. This avoids scoring repetitive or useless non-text
+          crap in large files such bogus JPEGs within an HTML file.
+
+Repeats -- When scoring a text run, do a cheap prediction of each character
+          and do not score a unigram/quadgram if the last character of same is
+          correctly predicted. This is a slower, finer-grained form of
+          cheapsqueeze, typically used when the first pass got unreliable
+          results.
+
+Top40 -- Restrict the set of scored languages to the Google "Top 40*", which is
+          actually 38 languages. This gets rid of about 110 language that
+          represent about 0.7% of the web. Typically used when the first pass
+          got unreliable results.
+
+Short -- Use trigram (three letter) scoring instad of quadgrams. Restricted to
+          the top 40* languages, Latin and Cyrillic scripts only.
+          Not as precise as quadgrams, but it gives some plausible result on
+          1- or 2-word text in major languages.
+
+Hint -- EXPERIMENTAL flag for compact_lang_det_test.cc to indicate a language
+          hint supplied in parameter plus_one.
+
+UseWords -- In additon to scoring quad/uni/nil-grams, score complete words
+
+Tentative decision logic:
+
+In the middle of first pass -- After 4KB of text, look at the front 256 bytes
+          of every full 4KB buffer. If it compresses very well (say 3:1) or has
+          lots of spaces (say 1 of every 4 bytes), assume that the input is
+          large and contains lots of bogus non-text. Recurse, passing the
+          Squeeze flag to strip out chunks of this non-text.
+
+At the end of the first pass --
+          If the top language is reliable and >= 70% of the document, return.
+          Else if the top language is reliable and top+2nd >= say 94%, return.
+          Else, either the top language is not reliable or there is a lot of
+          other crap.
+***/
+
+
+
+namespace CompactLangDetImpl {
+  // Scan interchange-valid UTF-8 bytes and detect most likely language,
+  // or set of languages.
+  //
+  // Design goals:
+  //   Skip over big stretches of HTML tags
+  //   Able to return ranges of different languages
+  //   Relatively small tables and relatively fast processing
+  //   Thread safe
+  //
+
+  typedef struct {
+    int perscript_count;
+    const Language* perscript_lang;
+  } PerScriptPair;
+
+  typedef struct {
+    // Constants for hashing 4-7 byte quadgram to 32 bits
+    const int kQuadHashB4Shift;
+    const int kQuadHashB4bShift;
+    const int kQuadHashB5Shift;
+    const int kQuadHashB5bShift;
+    // Constants for hashing 32 bits to kQuadKeyTable subscript/key
+    const int kHashvalToSubShift;
+    const uint32 kHashvalToSubMask;
+    const int kHashvalToKeyShift;
+    const uint32 kHashvalToKeyMask;
+    const int kHashvalAssociativity;
+    // Pointers to the actual tables
+    const PerScriptPair* kPerScriptPair;
+    const uint16* kQuadKeyTable;
+    const uint32* kQuadValueTable;
+  } LangDetObj;
+
+  // For HTML documents, tags are skipped, along with <script> ... </script>
+  // and <style> ... </style> sequences, and entities are expanded.
+  //
+  // We distinguish between bytes of the raw input buffer and bytes of non-tag
+  // text letters. Since tags can be over 50% of the bytes of an HTML Page,
+  // and are nearly all seven-bit ASCII English, we prefer to distinguish
+  // language mixture fractions based on just the non-tag text.
+  //
+  // Inputs: text and text_length
+  //  is_plain_text if true says to NOT parse/skip HTML tags nor entities
+  // Outputs:
+  //  language3 is an array of the top 3 languages or UNKNOWN_LANGUAGE
+  //  percent3 is an array of the text percentages 0..100 of the top 3 languages
+  //  normalized_score3 is an array of internal scores, normalized to the
+  //    average score for each language over a body of training text. A
+  //    normalized score significantly away from 1.0 indicates very skewed text
+  //    or gibberish.
+  //
+  //  text_bytes is the amount of non-tag/letters-only text found
+  //  is_reliable set true if the returned Language is at least 2**30 times more
+  //  probable then the second-best Language
+  //
+  // Return value: the most likely Language for the majority of the input text
+  //  Length 0 input and text with no reliable letter sequences returns
+  //  UNKNOWN_LANGUAGE
+  //
+  // Subsetting: For fast detection over large documents, these routines will
+  // scan non-tag text of the initial part of a document, then will
+  // skip 4-16 bytes and subsample text in the rest of the document, up to a
+  // fixed limit (currently 160KB of non-tag letters).
+  //
+
+  Language DetectLanguageSummaryV25(
+                        const char* buffer,
+                        int buffer_length,
+                        bool is_plain_text,
+                        const char* tld_hint,       // "id" boosts Indonesian
+                        int encoding_hint,          // SJS boosts Japanese
+                        Language language_hint,     // ITALIAN boosts it
+                        bool allow_extended_lang,
+                        int flags,
+                        Language plus_one,
+                        Language* language3,
+                        int* percent3,
+                        double* normalized_score3,
+                        int* text_bytes,
+                        bool* is_reliable);
+
+  // For unit testing:
+  // Remove portions of text that have a high density of spaces, or that are
+  // overly repetitive, squeezing the remaining text in-place to the front
+  // of the input buffer.
+  // Return the new, possibly-shorter length
+  int CheapSqueezeInplace(char* isrc, int srclen, int ichunksize);
+};      // End namespace CompactLangDetImpl
+
+#endif  // I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_IMPL_H_
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/ext_lang_enc.cc
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/ext_lang_enc.cc
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/ext_lang_enc.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_macros.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_strtoint.h"
+
+// Language names above NUM_LANGUAGES
+// These are also the C enum declared names
+static const char* const kExtLanguageName[] = {
+"X_BORK_BORK_BORK", "X_PIG_LATIN", "X_HACKER", "X_KLINGON", "X_ELMER_FUDD",
+
+// Pseudo-languages for Unicode scripts that express a single language
+"X_OGHAM", "X_RUNIC", "X_YI", "X_OLD_ITALIC", "X_GOTHIC",
+"X_DESERET", "X_HANUNOO", "X_BUHID", "X_TAGBANWA", "X_TAI_LE",
+"X_LINEAR_B", "X_UGARITIC", "X_SHAVIAN", "X_OSMANYA", "X_CYPRIOT",
+"X_BUGINESE", "X_COPTIC", "X_NEW_TAI_LUE", "X_GLAGOLITIC", "X_TIFINAGH",
+"X_SYLOTI_NAGRI", "X_OLD_PERSIAN", "X_KHAROSHTHI", "X_BALINESE", "X_CUNEIFORM",
+"X_PHOENICIAN", "X_PHAGS_PA", "X_NKO",
+
+// Unicode 5.1
+"X_SUDANESE", "X_LEPCHA", "X_OL_CHIKI", "X_VAI", "X_SAURASHTRA",
+"X_KAYAH_LI", "X_REJANG", "X_LYCIAN", "X_CARIAN", "X_LYDIAN",
+"X_CHAM",
+};
+
+
+// These are the C enum declared names, for programs creating C code
+static const char* const kExtLangDeclaredName[] = {
+  "ENGLISH",      /* 0 */
+  "DANISH",       /* 1 */
+  "DUTCH",        /* 2 */
+  "FINNISH",      /* 3 */
+  "FRENCH",       /* 4 */
+  "GERMAN",       /* 5 */
+  "HEBREW",       /* 6 */
+  "ITALIAN",      /* 7 */
+  "JAPANESE",     /* 8 */
+  "KOREAN",       /* 9 */
+  "NORWEGIAN",    /* 10 */
+  "POLISH",       /* 11 */
+  "PORTUGUESE",   /* 12 */
+  "RUSSIAN",      /* 13 */
+  "SPANISH",      /* 14 */
+  "SWEDISH",      /* 15 */
+  "CHINESE",      /* 16 */
+  "CZECH",        /* 17 */
+  "GREEK",        /* 18 */
+  "ICELANDIC",    /* 19 */
+  "LATVIAN",      /* 20 */
+  "LITHUANIAN",   /* 21 */
+  "ROMANIAN",     /* 22 */
+  "HUNGARIAN",    /* 23 */
+  "ESTONIAN",     /* 24 */
+  "TG_UNKNOWN_LANGUAGE",  /* 25 */
+  "UNKNOWN_LANGUAGE",     /* 26 */
+  "BULGARIAN",    /* 27 */
+  "CROATIAN",     /* 28 */
+  "SERBIAN",      /* 29 */
+  "IRISH",        /* 30 */
+  "GALICIAN",     /* 31 */
+  "TAGALOG",      /* 32 */
+  "TURKISH",      /* 33 */
+  "UKRAINIAN",    /* 34 */
+  "HINDI",        /* 35 */
+  "MACEDONIAN",   /* 36 */
+  "BENGALI",      /* 37 */
+  "INDONESIAN",   /* 38 */
+  "LATIN",        /* 39 */
+  "MALAY",        /* 40 */
+  "MALAYALAM",    /* 41 */
+  "WELSH",        /* 42 */
+  "NEPALI",       /* 43 */
+  "TELUGU",       /* 44 */
+  "ALBANIAN",     /* 45 */
+  "TAMIL",        /* 46 */
+  "BELARUSIAN",   /* 47 */
+  "JAVANESE",     /* 48 */
+  "OCCITAN",      /* 49 */
+  "URDU",         /* 50 */
+  "BIHARI",       /* 51 */
+  "GUJARATI",     /* 52 */
+  "THAI",         /* 53 */
+  "ARABIC",       /* 54 */
+  "CATALAN",      /* 55 */
+  "ESPERANTO",    /* 56 */
+  "BASQUE",       /* 57 */
+  "INTERLINGUA",  /* 58 */
+  "KANNADA",      /* 59 */
+  "PUNJABI",      /* 60 */
+  "SCOTS_GAELIC", /* 61 */
+  "SWAHILI",      /* 62 */
+  "SLOVENIAN",    /* 63 */
+  "MARATHI",      /* 64 */
+  "MALTESE",      /* 65 */
+  "VIETNAMESE",   /* 66 */
+  "FRISIAN",      /* 67 */
+  "SLOVAK",       /* 68 */
+  "CHINESE_T",    /* 69 */
+  "FAROESE",      /* 70 */
+  "SUNDANESE",    /* 71 */
+  "UZBEK",        /* 72 */
+  "AMHARIC",      /* 73 */
+  "AZERBAIJANI",  /* 74 */
+  "GEORGIAN",     /* 75 */
+  "TIGRINYA",     /* 76 */
+  "PERSIAN",      /* 77 */
+  "BOSNIAN",      /* 78 */
+  "SINHALESE",    /* 79 */
+  "NORWEGIAN_N",  /* 80 */
+  "PORTUGUESE_P", /* 81 */
+  "PORTUGUESE_B", /* 82 */
+  "XHOSA",        /* 83 */
+  "ZULU",         /* 84 */
+  "GUARANI",      /* 85 */
+  "SESOTHO",      /* 86 */
+  "TURKMEN",      /* 87 */
+  "KYRGYZ",       /* 88 */
+  "BRETON",       /* 89 */
+  "TWI",          /* 90 */
+  "YIDDISH",      /* 91 */
+  "SERBO_CROATIAN",       /* 92 */
+  "SOMALI",       /* 93 */
+  "UIGHUR",       /* 94 */
+  "KURDISH",      /* 95 */
+  "MONGOLIAN",    /* 96 */
+  "ARMENIAN",     /* 97 */
+  "LAOTHIAN",     /* 98 */
+  "SINDHI",       /* 99 */
+  "RHAETO_ROMANCE",  /* 100 */
+  "AFRIKAANS",    /* 101 */
+  "LUXEMBOURGISH", /* 102 */
+  "BURMESE",      /* 103 */
+  "KHMER",        /* 104 */
+  "TIBETAN",      /* 105 */
+  "DHIVEHI",      /* 106 */       // sometimes spelled Divehi; lang of Maldives
+  "CHEROKEE",     /* 107 */
+  "SYRIAC",       /* 108 */
+  "LIMBU",        /* 109 */
+  "ORIYA",        /* 110 */
+  "ASSAMESE",     /* 111 */
+  "CORSICAN",     /* 112 */
+  "INTERLINGUE",  /* 113 */
+  "KAZAKH",       /* 114 */
+  "LINGALA",      /* 115 */
+  "MOLDAVIAN",    /* 116 */
+  "PASHTO",       /* 117 */
+  "QUECHUA",      /* 118 */
+  "SHONA",        /* 119 */
+  "TAJIK",        /* 120 */
+  "TATAR",        /* 121 */
+  "TONGA",        /* 122 */
+  "YORUBA",       /* 123 */
+  "CREOLES_AND_PIDGINS_ENGLISH_BASED",      /* 124 */
+  "CREOLES_AND_PIDGINS_FRENCH_BASED",       /* 125 */
+  "CREOLES_AND_PIDGINS_PORTUGUESE_BASED",   /* 126 */
+  "CREOLES_AND_PIDGINS_OTHER",              /* 127 */
+  "MAORI",        /* 128 */
+  "WOLOF",        /* 129 */
+  "ABKHAZIAN",    /* 130 */
+  "AFAR",         /* 131 */
+  "AYMARA",       /* 132 */
+  "BASHKIR",      /* 133 */
+  "BISLAMA",      /* 134 */
+  "DZONGKHA",     /* 135 */
+  "FIJIAN",       /* 136 */
+  "GREENLANDIC",  /* 137 */
+  "HAUSA",        /* 138 */
+  "HAITIAN_CREOLE",  /* 139 */
+  "INUPIAK",      /* 140 */
+  "INUKTITUT",    /* 141 */
+  "KASHMIRI",     /* 142 */
+  "KINYARWANDA",  /* 143 */
+  "MALAGASY",     /* 144 */
+  "NAURU",        /* 145 */
+  "OROMO",        /* 146 */
+  "RUNDI",        /* 147 */
+  "SAMOAN",       /* 148 */
+  "SANGO",        /* 149 */
+  "SANSKRIT",     /* 150 */
+  "SISWANT",      /* 151 */
+  "TSONGA",       /* 152 */
+  "TSWANA",       /* 153 */
+  "VOLAPUK",      /* 154 */
+  "ZHUANG",       /* 155 */
+  "KHASI",        /* 156 */
+  "SCOTS",        /* 157 */
+  "GANDA",        /* 158 */
+  "MANX",         /* 159 */
+  "MONTENEGRIN",  /* 160 */
+  // Add new language declared names just before here
+};
+
+COMPILE_ASSERT(arraysize(kExtLangDeclaredName) == NUM_LANGUAGES,
+       kExtLangDeclaredName_has_incorrect_length);
+
+
+// Language codes above NUM_LANGUAGES
+// I made all these up, except Klingon from ISO-639-2
+// NOTE: zza is a standard name
+static const char* const kExtLanguageCode[] = {
+  // "X_BORK_BORK_BORK", "X_PIG_LATIN", "X_HACKER", "X_KLINGON", "X_ELMER_FUDD",
+  // All Latin script
+  "zzb", "zzp", "zzh", "tlh", "zze",
+
+  // Pseudo-languages for Unicode scripts that express a single language
+  "xx-Ogam", "xx-Runr", "xx-Yiii", "xx-Ital", "xx-Goth",
+  "xx-Dsrt", "xx-Hano", "xx-Buhd", "xx-Tagb", "xx-Tale",
+  "xx-Linb", "xx-Ugar", "xx-Shaw", "xx-Osma", "xx-Cprt",
+  "xx-Bugi", "xx-Copt", "xx-Talu", "xx-Glag", "xx-Tfng",
+  "xx-Sylo", "xx-Xpeo", "xx-Khar", "xx-Bali", "xx-Xsux",
+  "xx-Phnx", "xx-Phag", "xx-Nkoo",
+
+  // Unicode 5.1
+  "xx-Sund", "xx-Lepc", "xx-Olck", "xx-Vaii", "xx-Saur",
+  "xx-Kali", "xx-Rjng", "xx-Lyci", "xx-Cari", "xx-Lydi",
+  "xx-Cham",
+};
+
+
+// Given the Language, returns its string name used as the output by
+// the lang/enc identifier, e.g. "Korean"
+// "invalid_language" if the input is invalid.
+// TG_UNKNOWN_LANGUAGE is used as a placeholder for the "ignore me" language,
+// used to subtract out HTML, link farms, DNA strings, and alittle English porn
+const char* ExtLanguageName(const Language lang) {
+  if (lang < 0) {
+    // No-text-at-all result from a Tote
+    return "";
+  }
+  // CompactLanguageDetect extension
+  if (lang == TG_UNKNOWN_LANGUAGE) {
+    return "Ignore";
+  }
+  if ((0 <= lang) && (lang < NUM_LANGUAGES)) {
+    return LanguageName(lang);
+  }
+  if ((EXT_LANGUAGE_BASE <= lang) && (lang < EXT_NUM_LANGUAGES)) {
+    return kExtLanguageName[lang - EXT_LANGUAGE_BASE];
+  }
+  return invalid_language_name();
+}
+
+
+// Given the Language, returns its Language enum spelling, for use by
+// programs that create C declarations, e.g. "KOREAN"
+// "UNKNOWN_LANGUAGE" if the input is invalid.
+const char* ExtLanguageDeclaredName(const Language lang) {
+  if ((0 <= lang) && (lang < NUM_LANGUAGES)) {
+    return kExtLangDeclaredName[lang];
+  }
+  if ((EXT_LANGUAGE_BASE <= lang) && (lang < EXT_NUM_LANGUAGES)) {
+    return kExtLanguageName[lang - EXT_LANGUAGE_BASE];
+  }
+  return "UNKNOWN_LANGUAGE";
+}
+
+// Given the Language, return the language code, e.g. "ko"
+const char* ExtLanguageCode(const Language lang) {
+  // Hack for ignore/porn pseudo-language
+  if (lang == TG_UNKNOWN_LANGUAGE) {
+    return "xxx";
+  }
+  if ((0 <= lang) && (lang < NUM_LANGUAGES)) {
+    return LanguageCode(lang);
+  }
+  if ((EXT_LANGUAGE_BASE <= lang) && (lang < EXT_NUM_LANGUAGES)) {
+    return kExtLanguageCode[lang - EXT_LANGUAGE_BASE];
+  }
+  return "??";
+}
+
+
+// Convert "en-Latn-GB" to ENGLISH
+// Normalize to PORTUGUESE, not PORTUGUESE_B nor PORTUGUESE_P
+// Consider for later: NORWEGIAN, NORWEGIAN_N
+// Consider for later: SCOTS, SCOTS_GAELIC
+// Consider for later: SERBO_CROATIAN, SERBIAN, CROATIAN, BOSNIAN
+//
+Language GetLanguageFromNumberOrName(const char* src) {
+  if (strspn(src, "0123456789") == strlen(src)) {
+    // All digits
+    return static_cast<Language>(strto32(src, NULL, 10));
+  }
+
+  Language retlang = UNKNOWN_LANGUAGE;
+  size_t len = strlen(src);
+
+  if (true /*FLAGS_mergepairs*/) {
+    // Merge sets of langauges pt-xx en-xx fr-xx, NOT bs/hr/sr
+    if (memcmp(src, "pt-", 3) == 0) {return PORTUGUESE;}
+    if (memcmp(src, "en-", 3) == 0) {return ENGLISH;}
+    if (memcmp(src, "fr-", 3) == 0) {return FRENCH;}
+    // Use NormalizeLanguage instead
+    if (memcmp(src, "bs-", 3) == 0) {return CROATIAN;}
+    if (memcmp(src, "hr-", 3) == 0) {return CROATIAN;}
+    if (memcmp(src, "sr-Latn", 7) == 0) {return CROATIAN;}
+    if (memcmp(src, "sh-Latn", 7) == 0) {return CROATIAN;}
+    if (memcmp(src, "sr-Cyrl", 7) == 0) {return SERBIAN;}
+    if (memcmp(src, "sh-Cyrl", 7) == 0) {return SERBIAN;}
+  }
+
+  // Extensions
+  if (len >= 3) {
+    // Standin for ignore/porn "language"
+    if (memcmp(src, "xxx", 3) == 0) {return TG_UNKNOWN_LANGUAGE;}
+
+    if (memcmp(src, "zzb", 3) == 0) {return X_BORK_BORK_BORK;}
+    if (memcmp(src, "zzp", 3) == 0) {return X_PIG_LATIN;}
+    if (memcmp(src, "zzh", 3) == 0) {return X_HACKER;}
+    if (memcmp(src, "tlh", 3) == 0) {return X_KLINGON;}
+    if (memcmp(src, "zze", 3) == 0) {return X_ELMER_FUDD;}
+  }
+
+  // We have a name like en-Latn-GB or pt-BR
+  // First, get rid of some special cases
+  if (len <= 3) {
+    LanguageFromCode(src, &retlang);
+  } else if (len == 7) {
+    // More Extensions
+    if (memcmp(src, "xx-", 3) == 0) {
+      if (memcmp(src, "xx-Ogam", 7) == 0) {return X_OGHAM;}
+      if (memcmp(src, "xx-Runr", 7) == 0) {return X_RUNIC;}
+      if (memcmp(src, "xx-Yiii", 7) == 0) {return X_YI;}
+      if (memcmp(src, "xx-Ital", 7) == 0) {return X_OLD_ITALIC;}
+      if (memcmp(src, "xx-Goth", 7) == 0) {return X_GOTHIC;}
+      if (memcmp(src, "xx-Dsrt", 7) == 0) {return X_DESERET;}
+      if (memcmp(src, "xx-Hano", 7) == 0) {return X_HANUNOO;}
+      if (memcmp(src, "xx-Buhd", 7) == 0) {return X_BUHID;}
+      if (memcmp(src, "xx-Tagb", 7) == 0) {return X_TAGBANWA;}
+      if (memcmp(src, "xx-Tale", 7) == 0) {return X_TAI_LE;}
+      if (memcmp(src, "xx-Linb", 7) == 0) {return X_LINEAR_B;}
+      if (memcmp(src, "xx-Ugar", 7) == 0) {return X_UGARITIC;}
+      if (memcmp(src, "xx-Shaw", 7) == 0) {return X_SHAVIAN;}
+      if (memcmp(src, "xx-Osma", 7) == 0) {return X_OSMANYA;}
+      if (memcmp(src, "xx-Cprt", 7) == 0) {return X_CYPRIOT;}
+      if (memcmp(src, "xx-Bugi", 7) == 0) {return X_BUGINESE;}
+      if (memcmp(src, "xx-Copt", 7) == 0) {return X_COPTIC;}
+      if (memcmp(src, "xx-Talu", 7) == 0) {return X_NEW_TAI_LUE;}
+      if (memcmp(src, "xx-Glag", 7) == 0) {return X_GLAGOLITIC;}
+      if (memcmp(src, "xx-Tfng", 7) == 0) {return X_TIFINAGH;}
+      if (memcmp(src, "xx-Sylo", 7) == 0) {return X_SYLOTI_NAGRI;}
+      if (memcmp(src, "xx-Xpeo", 7) == 0) {return X_OLD_PERSIAN;}
+      if (memcmp(src, "xx-Khar", 7) == 0) {return X_KHAROSHTHI;}
+      if (memcmp(src, "xx-Bali", 7) == 0) {return X_BALINESE;}
+      if (memcmp(src, "xx-Xsux", 7) == 0) {return X_CUNEIFORM;}
+      if (memcmp(src, "xx-Phnx", 7) == 0) {return X_PHOENICIAN;}
+      if (memcmp(src, "xx-Phag", 7) == 0) {return X_PHAGS_PA;}
+      if (memcmp(src, "xx-Nkoo", 7) == 0) {return X_NKO;}
+
+      // Unicode 5.1
+      if (memcmp(src, "xx-Sund", 7) == 0) {return X_SUDANESE;}
+      if (memcmp(src, "xx-Lepc", 7) == 0) {return X_LEPCHA;}
+      if (memcmp(src, "xx-Olck", 7) == 0) {return X_OL_CHIKI;}
+      if (memcmp(src, "xx-Vaii", 7) == 0) {return X_VAI;}
+      if (memcmp(src, "xx-Saur", 7) == 0) {return X_SAURASHTRA;}
+      if (memcmp(src, "xx-Kali", 7) == 0) {return X_KAYAH_LI;}
+      if (memcmp(src, "xx-Rjng", 7) == 0) {return X_REJANG;}
+      if (memcmp(src, "xx-Lyci", 7) == 0) {return X_LYCIAN;}
+      if (memcmp(src, "xx-Cari", 7) == 0) {return X_CARIAN;}
+      if (memcmp(src, "xx-Lydi", 7) == 0) {return X_LYDIAN;}
+      if (memcmp(src, "xx-Cham", 7) == 0) {return X_CHAM;}
+    }
+  }
+  // Some other weird ones
+  // Could be Latn or Limb; all our current training data is Latn
+  if (strcmp(src, "sit-NP") == 0) {return LIMBU;}
+  if (strcmp(src, "un-Latn") == 0) {return UNKNOWN_LANGUAGE;}
+
+  // Multi-country langauges
+  if (memcmp(src, "zh", 2) == 0) {
+    if (memcmp(&src[len - 2], "TW", 2) == 0) {return CHINESE_T;}
+    if (memcmp(&src[len - 2], "HK", 2) == 0) {return CHINESE_T;}
+    return CHINESE;
+  }
+  if (memcmp(src, "pt", 2) == 0) {
+    if (memcmp(&src[len - 2], "BR", 2) == 0) {return PORTUGUESE;}
+    return PORTUGUESE;
+  }
+  if (memcmp(src, "fr", 2) == 0) {
+    if (memcmp(&src[len -2], "CA", 2) == 0) {return FRENCH;}
+    return FRENCH;
+  }
+
+  // None of the special cases matched
+  if (src[2] == '-') {
+    char temp[4];
+    memcpy(temp, src, 4);
+    temp[2] = '\0';
+    LanguageFromCode(temp, &retlang);
+  }
+  if (src[3] == '-') {
+    char temp[4];
+    memcpy(temp, src, 4);
+    temp[3] = '\0';
+    LanguageFromCode(temp, &retlang);
+  }
+  if (retlang != UNKNOWN_LANGUAGE) {
+    return retlang;
+  }
+
+  return retlang;
+}
+
+typedef struct {
+  const char* name;
+  UnicodeLScript lscript;
+} NameScriptPair;
+
+// In alphabetic order for binary search
+static const NameScriptPair kNameScriptPair[] = {
+  // Unicode 5.1 additional scripts
+  {"Arab", ULScript_Arabic},
+  {"Armn", ULScript_Armenian},
+  {"Bali", ULScript_Balinese},
+  {"Beng", ULScript_Bengali},
+  {"Bugi", ULScript_Buginese},
+  {"Buhd", ULScript_Buhid},
+  {"Cans", ULScript_Canadian_Aboriginal},
+  {"Cari", ULScript_Carian},      // Unicode 5.1
+  {"Cham", ULScript_Cham},        // Unicode 5.1
+  {"Cher", ULScript_Cherokee},
+  {"Copt", ULScript_Coptic},
+  {"Cprt", ULScript_Cypriot},
+  {"Cyrl", ULScript_Cyrillic},
+  {"Deva", ULScript_Devanagari},
+  {"Dsrt", ULScript_Deseret},
+  {"Ethi", ULScript_Ethiopic},
+  {"Geor", ULScript_Georgian},
+  {"Glag", ULScript_Glagolitic},
+  {"Goth", ULScript_Gothic},
+  {"Grek", ULScript_Greek},
+  {"Gujr", ULScript_Gujarati},
+  {"Guru", ULScript_Gurmukhi},
+  {"Hani", ULScript_HanCJK},
+  {"Hano", ULScript_Hanunoo},
+  {"Hebr", ULScript_Hebrew},
+  {"Ital", ULScript_Old_Italic},
+  {"Kali", ULScript_Kayah_Li},    // Unicode 5.1
+  {"Khar", ULScript_Kharoshthi},
+  {"Khmr", ULScript_Khmer},
+  {"Knda", ULScript_Kannada},
+  {"Laoo", ULScript_Lao},
+  {"Latn", ULScript_Latin},
+  {"Lepc", ULScript_Lepcha},      // Unicode 5.1
+  {"Limb", ULScript_Limbu},
+  {"Linb", ULScript_Linear_B},
+  {"Lyci", ULScript_Lycian},      // Unicode 5.1
+  {"Lydi", ULScript_Lydian},      // Unicode 5.1
+  {"Mlym", ULScript_Malayalam},
+  {"Mong", ULScript_Mongolian},
+  {"Mymr", ULScript_Myanmar},
+  {"Nkoo", ULScript_Nko},
+  {"Ogam", ULScript_Ogham},
+  {"Olck", ULScript_Ol_Chiki},    // Unicode 5.1
+  {"Orya", ULScript_Oriya},
+  {"Osma", ULScript_Osmanya},
+  {"Phag", ULScript_Phags_Pa},
+  {"Phnx", ULScript_Phoenician},
+  {"Rjng", ULScript_Rejang},      // Unicode 5.1
+  {"Runr", ULScript_Runic},
+  {"Saur", ULScript_Saurashtra},  // Unicode 5.1
+  {"Shaw", ULScript_Shavian},
+  {"Sinh", ULScript_Sinhala},
+  {"Sund", ULScript_Sundanese},   // Unicode 5.1
+  {"Sylo", ULScript_Syloti_Nagri},
+  {"Syrc", ULScript_Syriac},
+  {"Tagb", ULScript_Tagbanwa},
+  {"Tale", ULScript_Tai_Le},
+  {"Talu", ULScript_New_Tai_Lue},
+  {"Taml", ULScript_Tamil},
+  {"Telu", ULScript_Telugu},
+  {"Tfng", ULScript_Tifinagh},
+  {"Tglg", ULScript_Tagalog},
+  {"Thaa", ULScript_Thaana},
+  {"Thai", ULScript_Thai},
+  {"Tibt", ULScript_Tibetan},
+  {"Ugar", ULScript_Ugaritic},
+  {"Vaii", ULScript_Vai},         // Unicode 5.1 // NOTE: apparently 'Vai '
+  {"Xpeo", ULScript_Old_Persian},
+  {"Xsux", ULScript_Cuneiform},
+  {"Yiii", ULScript_Yi},
+  {"Zyyy", ULScript_Common},
+  {"Zzzz", ULScript_Inherited},
+};
+
+// Convert "en-Latn-GB" to ULScript_Latin
+UnicodeLScript GetLScriptFromNumberOrName(const char* src) {
+  if (strspn(src, "0123456789") == strlen(src)) {
+    // All digits
+    return static_cast<UnicodeLScript>(strto32(src, NULL, 10));
+  }
+
+  if (strcmp(src, "zh-TW") == 0) {return ULScript_HanCJK;}
+  if (strcmp(src, "zh-CN") == 0) {return ULScript_HanCJK;}
+  if (strcmp(src, "pt-BR") == 0) {return ULScript_Latin;}
+  if (strcmp(src, "pt-PT") == 0) {return ULScript_Latin;}
+  // Could be Latn or Limb; all our current training data is Latn
+  if (strcmp(src, "sit-NP") == 0) {return ULScript_Latin;}
+
+  // Isolate just the script field
+  char temp[5];
+  const char* src2 = strchr(src, '-');
+  if (src2 == NULL) {return ULScript_Latin;}
+  src2 += 1;      // over the -
+  memcpy(temp, src2, 4);
+  temp[4] = '\0';
+
+  int lo = 0;
+  int hi = ULScript_NUM_SCRIPTS;
+  while (lo < hi) {
+    int mid = (lo + hi) >> 1;
+    if (strcmp(temp, kNameScriptPair[mid].name) < 0) {
+      hi = mid;
+    } else if (strcmp(temp, kNameScriptPair[mid].name) > 0) {
+      lo = mid + 1;
+    } else {
+      return kNameScriptPair[mid].lscript;
+    }
+  }
+  return ULScript_Latin;
+}
+
+
+// Merge together some languages, such as bo/hr/sr
+// Croatian Latin and Serbian Cyrillic now.
+Language NormalizeLanguage(Language lang) {
+  if (lang == BOSNIAN) {return CROATIAN;}
+  if (lang == SERBO_CROATIAN) {return SERBIAN;}
+
+  if (lang == PORTUGUESE_P) {return PORTUGUESE;}
+  if (lang == PORTUGUESE_B) {return PORTUGUESE;}
+
+  return lang;
+}
--- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/ext_lang_enc.h
+++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/ext_lang_enc.h
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef I18N_ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__
+#define I18N_ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__
+
+#include "third_party/cld/bar/toolbar/cld/i18n/languages/public/languages.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/letterscript_enum.h"
+
+
+// Leave a small gap after the base languages, so adding one or two is easy.
+// Just reduce the gap here (currently 5 entries)
+
+#define EXT_LANGUAGE_BASE (NUM_LANGUAGES + 4)
+
+// Google UI languages
+#define X_BORK_BORK_BORK (Language)(EXT_LANGUAGE_BASE+0)
+#define X_PIG_LATIN (Language)(EXT_LANGUAGE_BASE+1)
+#define X_HACKER (Language)(EXT_LANGUAGE_BASE+2)
+#define X_KLINGON (Language)(EXT_LANGUAGE_BASE+3)
+#define X_ELMER_FUDD (Language)(EXT_LANGUAGE_BASE+4)
+
+// Pseudo-languages for Unicode scripts that express a single language
+#define X_OGHAM (Language)(EXT_LANGUAGE_BASE+5)
+#define X_RUNIC (Language)(EXT_LANGUAGE_BASE+6)
+#define X_YI (Language)(EXT_LANGUAGE_BASE+7)
+#define X_OLD_ITALIC (Language)(EXT_LANGUAGE_BASE+8)
+#define X_GOTHIC (Language)(EXT_LANGUAGE_BASE+9)
+#define X_DESERET (Language)(EXT_LANGUAGE_BASE+10)
+#define X_HANUNOO (Language)(EXT_LANGUAGE_BASE+11)
+#define X_BUHID (Language)(EXT_LANGUAGE_BASE+12)
+#define X_TAGBANWA (Language)(EXT_LANGUAGE_BASE+13)
+#define X_TAI_LE (Language)(EXT_LANGUAGE_BASE+14)
+#define X_LINEAR_B (Language)(EXT_LANGUAGE_BASE+15)
+#define X_UGARITIC (Language)(EXT_LANGUAGE_BASE+16)
+#define X_SHAVIAN (Language)(EXT_LANGUAGE_BASE+17)
+#define X_OSMANYA (Language)(EXT_LANGUAGE_BASE+18)
+#define X_CYPRIOT (Language)(EXT_LANGUAGE_BASE+19)
+#define X_BUGINESE (Language)(EXT_LANGUAGE_BASE+20)
+#define X_COPTIC (Language)(EXT_LANGUAGE_BASE+21)
+#define X_NEW_TAI_LUE (Language)(EXT_LANGUAGE_BASE+22)
+#define X_GLAGOLITIC (Language)(EXT_LANGUAGE_BASE+23)
+#define X_TIFINAGH (Language)(EXT_LANGUAGE_BASE+24)
+#define X_SYLOTI_NAGRI (Language)(EXT_LANGUAGE_BASE+25)
+#define X_OLD_PERSIAN (Language)(EXT_LANGUAGE_BASE+26)
+#define X_KHAROSHTHI (Language)(EXT_LANGUAGE_BASE+27)
+#define X_BALINESE (Language)(EXT_LANGUAGE_BASE+28)
+#define X_CUNEIFORM (Language)(EXT_LANGUAGE_BASE+29)
+#define X_PHOENICIAN (Language)(EXT_LANGUAGE_BASE+30)
+#define X_PHAGS_PA (Language)(EXT_LANGUAGE_BASE+31)
+#define X_NKO (Language)(EXT_LANGUAGE_BASE+32)
+
+// Unicode 5.1
+#define X_SUDANESE (Language)(EXT_LANGUAGE_BASE+33)
+#define X_LEPCHA (Language)(EXT_LANGUAGE_BASE+34)
+#define X_OL_CHIKI (Language)(EXT_LANGUAGE_BASE+35)
+#define X_VAI (Language)(EXT_LANGUAGE_BASE+36)
+#define X_SAURASHTRA (Language)(EXT_LANGUAGE_BASE+37)
+#define X_KAYAH_LI (Language)(EXT_LANGUAGE_BASE+38)
+#define X_REJANG (Language)(EXT_LANGUAGE_BASE+39)
+#define X_LYCIAN (Language)(EXT_LANGUAGE_BASE+40)
+#define X_CARIAN (Language)(EXT_LANGUAGE_BASE+41)
+#define X_LYDIAN (Language)(EXT_LANGUAGE_BASE+42)
+#define X_CHAM (Language)(EXT_LANGUAGE_BASE+43)
+
+#define EXT_NUM_LANGUAGES (Language)(EXT_LANGUAGE_BASE+44)
+
+
+
+// ExtLanguageName
+// ------------
+// Given the Language, returns its string name used as the output by
+// the lang/enc identifier, e.g. "Korean"
+// "invalid_language" if the input is invalid.
+extern const char* ExtLanguageName(const Language lang);
+
+// ExtLanguageDeclaredName
+// ------------
+// Given the Language, returns its Language enum spelling, for use by
+// programs that create C declarations, e.g. "KOREAN"
+// "UNKNOWN_LANGUAGE" if the input is invalid.
+extern const char* ExtLanguageDeclaredName(const Language lang);
+
+// ExtLanguageCode
+// ------------
+// Given the Language, return the language code, e.g. "ko"
+// This is determined by
+// the following (in order of preference):
+// - ISO-639-1 two-letter language code
+//   (all except those mentioned below)
+// - ISO-639-2 three-letter bibliographic language code
+//   (Tibetan, Dhivehi, Cherokee, Syriac)
+// - Google-specific language code
+//   (ChineseT ("zh-TW"), Teragram Unknown, Unknown,
+//   Portuguese-Portugal, Portuguese-Brazil, Limbu)
+extern const char * ExtLanguageCode(const Language lang);
+
+
+// Convert "en-Latn-GB" to ENGLISH
+// Normalize to PORTUGUESE, not PORTUGUESE_B nor PORTUGUESE_P
+// Consider for later: NORWEGIAN, NORWEGIAN_N
+// Consider for later: SCOTS, SCOTS_GAELIC
+// Consider for later: SERBO_CROATIAN, SERBIAN, CROATIAN, BOSNIAN
+//
+Language GetLanguageFromNumberOrName(const char* src);
+
+// Convert "en-Latn-GB" to ULScript_Latin
+UnicodeLScript GetLScriptFromNumberOrName(const char* src);
+
+// Merge together some languages, such as bo/hr/sr
+Language NormalizeLanguage(Language lang);
+
+#endif  // I18N_ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__