From ca60d60feaf0afa739b49ecc5479248e37a9cb8c Mon Sep 17 00:00:00 2001
From: moneromooo-monero <moneromooo-monero@users.noreply.github.com>
Date: Tue, 28 Apr 2020 13:28:55 +0000
Subject: [PATCH] easylogging++: sanitize log payload

Some of it might be coming from untrusted sources

Reported by itsunixiknowthis
---
 external/easylogging++/easylogging++.cc |  96 ++++++++++++++++++++
 src/common/CMakeLists.txt               |   3 +-
 src/common/utf8.h                       | 114 ++++++++++++++++++++++++
 src/mnemonics/language_base.h           |  74 +--------------
 4 files changed, 216 insertions(+), 71 deletions(-)
 create mode 100644 src/common/utf8.h
diff --git a/external/easylogging++/easylogging++.cc b/external/easylogging++/easylogging++.cc
index 8439bec0b..0d748c225 100644
--- a/external/easylogging++/easylogging++.cc
+++ b/external/easylogging++/easylogging++.cc
@@ -2475,6 +2475,100 @@ void DefaultLogDispatchCallback::handle(const LogDispatchData* data) {
   }
 }
 
+
+template<typename Transform>
+static inline std::string utf8canonical(const std::string &s, Transform t = [](wint_t c)->wint_t { return c; })
+{
+    std::string sc = "";
+    size_t avail = s.size();
+    const char *ptr = s.data();
+    wint_t cp = 0;
+    int bytes = 1;
+    char wbuf[8], *wptr;
+    while (avail--)
+    {
+      if ((*ptr & 0x80) == 0)
+      {
+        cp = *ptr++;
+        bytes = 1;
+      }
+      else if ((*ptr & 0xe0) == 0xc0)
+      {
+        if (avail < 1)
+          throw std::runtime_error("Invalid UTF-8");
+        cp = (*ptr++ & 0x1f) << 6;
+        cp |= *ptr++ & 0x3f;
+        --avail;
+        bytes = 2;
+      }
+      else if ((*ptr & 0xf0) == 0xe0)
+      {
+        if (avail < 2)
+          throw std::runtime_error("Invalid UTF-8");
+        cp = (*ptr++ & 0xf) << 12;
+        cp |= (*ptr++ & 0x3f) << 6;
+        cp |= *ptr++ & 0x3f;
+        avail -= 2;
+        bytes = 3;
+      }
+      else if ((*ptr & 0xf8) == 0xf0)
+      {
+        if (avail < 3)
+          throw std::runtime_error("Invalid UTF-8");
+        cp = (*ptr++ & 0x7) << 18;
+        cp |= (*ptr++ & 0x3f) << 12;
+        cp |= (*ptr++ & 0x3f) << 6;
+        cp |= *ptr++ & 0x3f;
+        avail -= 3;
+        bytes = 4;
+      }
+      else
+        throw std::runtime_error("Invalid UTF-8");
+
+      cp = t(cp);
+      if (cp <= 0x7f)
+        bytes = 1;
+      else if (cp <= 0x7ff)
+        bytes = 2;
+      else if (cp <= 0xffff)
+        bytes = 3;
+      else if (cp <= 0x10ffff)
+        bytes = 4;
+      else
+        throw std::runtime_error("Invalid code point UTF-8 transformation");
+
+      wptr = wbuf;
+      switch (bytes)
+      {
+        case 1: *wptr++ = cp; break;
+        case 2: *wptr++ = 0xc0 | (cp >> 6); *wptr++ = 0x80 | (cp & 0x3f); break;
+        case 3: *wptr++ = 0xe0 | (cp >> 12); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break;
+        case 4: *wptr++ = 0xf0 | (cp >> 18); *wptr++ = 0x80 | ((cp >> 12) & 0x3f); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break;
+        default: throw std::runtime_error("Invalid UTF-8");
+      }
+      *wptr = 0;
+      sc.append(wbuf, bytes);
+      cp = 0;
+      bytes = 1;
+    }
+    return sc;
+}
+
+void sanitize(std::string &s)
+{
+  s = utf8canonical(s, [](wint_t c)->wint_t {
+    if (c == 9 || c == 10 || c == 13)
+      return c;
+    if (c < 0x20)
+      return '?';
+    if (c == 0x7f)
+      return '?';
+    if (c >= 0x80 && c <= 0x9f)
+      return '?';
+    return c;
+  });
+}
+
 void DefaultLogDispatchCallback::dispatch(base::type::string_t&& rawLinePrefix, base::type::string_t&& rawLinePayload, base::type::string_t&& logLine) {
   if (m_data->dispatchAction() == base::DispatchAction::NormalLog || m_data->dispatchAction() == base::DispatchAction::FileOnlyLog) {
     if (m_data->logMessage()->logger()->m_typedConfigurations->toFile(m_data->logMessage()->level())) {
@@ -2506,6 +2600,8 @@ void DefaultLogDispatchCallback::dispatch(base::type::string_t&& rawLinePrefix,
         m_data->logMessage()->logger()->logBuilder()->setColor(el::base::utils::colorFromLevel(level), false);
         ELPP_COUT << rawLinePrefix;
         m_data->logMessage()->logger()->logBuilder()->setColor(color == el::Color::Default ? el::base::utils::colorFromLevel(level): color, color != el::Color::Default);
+        try { sanitize(rawLinePayload); }
+        catch (const std::exception &e) { rawLinePayload = "<Invalid UTF-8 in log>"; }
         ELPP_COUT << rawLinePayload;
         m_data->logMessage()->logger()->logBuilder()->setColor(el::Color::Default, false);
         ELPP_COUT << std::flush;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index f06737b31..35b3555a2 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -86,7 +86,8 @@ set(common_private_headers
   updates.h
   aligned.h
   timings.h
-  combinator.h)
+  combinator.h
+  utf8.h)
 
 monero_private_headers(common
   ${common_private_headers})
diff --git a/src/common/utf8.h b/src/common/utf8.h
new file mode 100644
index 000000000..60247f1b2
--- /dev/null
+++ b/src/common/utf8.h
@@ -0,0 +1,114 @@
+// Copyright (c) 2019, The Monero Project
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without modification, are
+// permitted provided that the following conditions are met:
+// 
+// 1. Redistributions of source code must retain the above copyright notice, this list of
+//    conditions and the following disclaimer.
+// 
+// 2. Redistributions in binary form must reproduce the above copyright notice, this list
+//    of conditions and the following disclaimer in the documentation and/or other
+//    materials provided with the distribution.
+// 
+// 3. Neither the name of the copyright holder nor the names of its contributors may be
+//    used to endorse or promote products derived from this software without specific
+//    prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#pragma once 
+
+#include <cctype>
+#include <cwchar>
+#include <stdexcept>
+
+namespace tools
+{
+  template<typename T, typename Transform>
+  inline T utf8canonical(const T &s, Transform t = [](wint_t c)->wint_t { return c; })
+  {
+    T sc = "";
+    size_t avail = s.size();
+    const char *ptr = s.data();
+    wint_t cp = 0;
+    int bytes = 1;
+    char wbuf[8], *wptr;
+    while (avail--)
+    {
+      if ((*ptr & 0x80) == 0)
+      {
+        cp = *ptr++;
+        bytes = 1;
+      }
+      else if ((*ptr & 0xe0) == 0xc0)
+      {
+        if (avail < 1)
+          throw std::runtime_error("Invalid UTF-8");
+        cp = (*ptr++ & 0x1f) << 6;
+        cp |= *ptr++ & 0x3f;
+        --avail;
+        bytes = 2;
+      }
+      else if ((*ptr & 0xf0) == 0xe0)
+      {
+        if (avail < 2)
+          throw std::runtime_error("Invalid UTF-8");
+        cp = (*ptr++ & 0xf) << 12;
+        cp |= (*ptr++ & 0x3f) << 6;
+        cp |= *ptr++ & 0x3f;
+        avail -= 2;
+        bytes = 3;
+      }
+      else if ((*ptr & 0xf8) == 0xf0)
+      {
+        if (avail < 3)
+          throw std::runtime_error("Invalid UTF-8");
+        cp = (*ptr++ & 0x7) << 18;
+        cp |= (*ptr++ & 0x3f) << 12;
+        cp |= (*ptr++ & 0x3f) << 6;
+        cp |= *ptr++ & 0x3f;
+        avail -= 3;
+        bytes = 4;
+      }
+      else
+        throw std::runtime_error("Invalid UTF-8");
+
+      cp = t(cp);
+      if (cp <= 0x7f)
+        bytes = 1;
+      else if (cp <= 0x7ff)
+        bytes = 2;
+      else if (cp <= 0xffff)
+        bytes = 3;
+      else if (cp <= 0x10ffff)
+        bytes = 4;
+      else
+        throw std::runtime_error("Invalid code point UTF-8 transformation");
+
+      wptr = wbuf;
+      switch (bytes)
+      {
+        case 1: *wptr++ = cp; break;
+        case 2: *wptr++ = 0xc0 | (cp >> 6); *wptr++ = 0x80 | (cp & 0x3f); break;
+        case 3: *wptr++ = 0xe0 | (cp >> 12); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break;
+        case 4: *wptr++ = 0xf0 | (cp >> 18); *wptr++ = 0x80 | ((cp >> 12) & 0x3f); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break;
+        default: throw std::runtime_error("Invalid UTF-8");
+      }
+      *wptr = 0;
+      sc.append(wbuf, bytes);
+      cp = 0;
+      bytes = 1;
+    }
+    return sc;
+  }
+}
diff --git a/src/mnemonics/language_base.h b/src/mnemonics/language_base.h
index 7d2599e9a..ad09dc5fa 100644
--- a/src/mnemonics/language_base.h
+++ b/src/mnemonics/language_base.h
@@ -41,6 +41,7 @@
 #include <boost/algorithm/string.hpp>
 #include "misc_log_ex.h"
 #include "fnv1.h"
+#include "common/utf8.h"
 
 /*!
  * \namespace Language
@@ -73,78 +74,11 @@ namespace Language
     return prefix;
   }
 
-  template<typename T>
-  inline T utf8canonical(const T &s)
-  {
-    T sc = "";
-    size_t avail = s.size();
-    const char *ptr = s.data();
-    wint_t cp = 0;
-    int bytes = 1;
-    char wbuf[8], *wptr;
-    while (avail--)
-    {
-      if ((*ptr & 0x80) == 0)
-      {
-        cp = *ptr++;
-        bytes = 1;
-      }
-      else if ((*ptr & 0xe0) == 0xc0)
-      {
-        if (avail < 1)
-          throw std::runtime_error("Invalid UTF-8");
-        cp = (*ptr++ & 0x1f) << 6;
-        cp |= *ptr++ & 0x3f;
-        --avail;
-        bytes = 2;
-      }
-      else if ((*ptr & 0xf0) == 0xe0)
-      {
-        if (avail < 2)
-          throw std::runtime_error("Invalid UTF-8");
-        cp = (*ptr++ & 0xf) << 12;
-        cp |= (*ptr++ & 0x3f) << 6;
-        cp |= *ptr++ & 0x3f;
-        avail -= 2;
-        bytes = 3;
-      }
-      else if ((*ptr & 0xf8) == 0xf0)
-      {
-        if (avail < 3)
-          throw std::runtime_error("Invalid UTF-8");
-        cp = (*ptr++ & 0x7) << 18;
-        cp |= (*ptr++ & 0x3f) << 12;
-        cp |= (*ptr++ & 0x3f) << 6;
-        cp |= *ptr++ & 0x3f;
-        avail -= 3;
-        bytes = 4;
-      }
-      else
-        throw std::runtime_error("Invalid UTF-8");
-
-      cp = std::towlower(cp);
-      wptr = wbuf;
-      switch (bytes)
-      {
-        case 1: *wptr++ = cp; break;
-        case 2: *wptr++ = 0xc0 | (cp >> 6); *wptr++ = 0x80 | (cp & 0x3f); break;
-        case 3: *wptr++ = 0xe0 | (cp >> 12); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break;
-        case 4: *wptr++ = 0xf0 | (cp >> 18); *wptr++ = 0x80 | ((cp >> 12) & 0x3f); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break;
-        default: throw std::runtime_error("Invalid UTF-8");
-      }
-      *wptr = 0;
-      sc += T(wbuf, bytes);
-      cp = 0;
-      bytes = 1;
-    }
-    return sc;
-  }
-
   struct WordHash
   {
     std::size_t operator()(const epee::wipeable_string &s) const
     {
-      const epee::wipeable_string sc = utf8canonical(s);
+      const epee::wipeable_string sc = tools::utf8canonical(s, [](wint_t c) -> wint_t { return std::towlower(c); });
       return epee::fnv::FNV1a(sc.data(), sc.size());
     }
   };
@@ -153,8 +87,8 @@ namespace Language
   {
     bool operator()(const epee::wipeable_string &s0, const epee::wipeable_string &s1) const
     {
-      const epee::wipeable_string s0c = utf8canonical(s0);
-      const epee::wipeable_string s1c = utf8canonical(s1);
+      const epee::wipeable_string s0c = tools::utf8canonical(s0, [](wint_t c) -> wint_t { return std::towlower(c); });
+      const epee::wipeable_string s1c = tools::utf8canonical(s1, [](wint_t c) -> wint_t { return std::towlower(c); });
       return s0c == s1c;
     }
   };