From 9f2c80205e234968f2519f60a6bc9fa6528148f7 Mon Sep 17 00:00:00 2001
From: "robertshield@chromium.org"
 <robertshield@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>
Date: Tue, 14 Dec 2010 20:57:55 +0000
Subject: [PATCH] Have Chrome Frame "support" IE conditional comment tags (of
 the downlevel-hidden variety) by ignoring them (rather than treating them as
 comment start/end) when parsing HTML streams.

BUG=65627
TEST=chrome_frame_unittests.exe, also syntax like <!--[if IE]><meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"><![endif]-->  will now activate CF.

Offtopic: I wonder what happens if I paste HTML into CL comments. Heh.


Review URL: http://codereview.chromium.org/5708007

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@69177 0039d316-1c4b-4281-b951-d872f2087c98
---
 chrome_frame/html_utils.cc               |  36 +++++++-
 chrome_frame/html_utils.h                |   9 +-
 chrome_frame/test/html_util_unittests.cc | 102 ++++++++++++++++++++++-
 3 files changed, 142 insertions(+), 5 deletions(-)

diff --git a/chrome_frame/html_utils.cc b/chrome_frame/html_utils.cc
index 7e41210fcaa63..1d4d62934057b 100644
--- a/chrome_frame/html_utils.cc
+++ b/chrome_frame/html_utils.cc
@@ -215,7 +215,8 @@ bool HTMLScanner::IsQuote(wchar_t c) {
   return quotes_.find(c) != std::wstring::npos;
 }
 
-bool HTMLScanner::IsHTMLCommentClose(StringRange* html_string, StrPos pos) {
+bool HTMLScanner::IsHTMLCommentClose(const StringRange* html_string,
+                                     StrPos pos) {
   if (pos < html_string->end_ && pos > html_string->start_ + 2 &&
       *pos == L'>') {
     return *(pos-1) == L'-' && *(pos-2) == L'-';
@@ -223,6 +224,16 @@ bool HTMLScanner::IsHTMLCommentClose(StringRange* html_string, StrPos pos) {
   return false;
 }
 
+bool HTMLScanner::IsIEConditionalCommentClose(const StringRange* html_string,
+                                              StrPos pos) {
+  if (pos < html_string->end_ && pos > html_string->start_ + 2 &&
+      *pos == L'>') {
+    return *(pos-1) == L']';
+  }
+  return false;
+}
+
+
 bool HTMLScanner::NextTag(StringRange* html_string, StringRange* tag) {
   DCHECK(NULL != html_string);
   DCHECK(NULL != tag);
@@ -245,9 +256,28 @@ bool HTMLScanner::NextTag(StringRange* html_string, StringRange* tag) {
   std::wstring tag_name;
   StringRange start_range(tag->start_, html_string->end_);
   start_range.GetTagName(&tag_name);
-  if (StartsWith(tag_name, L"!--", true)) {
-    // We're inside a comment tag, keep going until we get out of it.
+  if (StartsWith(tag_name, L"!--[if", true)) {
+    // This looks like the beginning of an IE conditional comment, scan until
+    // we hit the end which always looks like ']>'. For now we disregard the
+    // contents of the condition, and always assume true.
+    // TODO(robertshield): Optionally support the grammar defined by
+    // http://msdn.microsoft.com/en-us/library/ms537512(VS.85).aspx#syntax.
+    while (tag->end_ < html_string->end_ &&
+           !IsIEConditionalCommentClose(html_string, tag->end_)) {
+      tag->end_++;
+    }
+  } else if (StartsWith(tag_name, L"!--", true)) {
+    // We're inside a comment tag which ends in '-->'. Keep going until we
+    // reach the end.
+    while (tag->end_ < html_string->end_ &&
+           !IsHTMLCommentClose(html_string, tag->end_)) {
+      tag->end_++;
+    }
+  } else if (StartsWith(tag_name, L"![endif", true)) {
+    // We're inside the closing tag of an IE conditional comment which ends in
+    // either '-->' of ']>'. Keep going until we reach the end.
     while (tag->end_ < html_string->end_ &&
+           !IsIEConditionalCommentClose(html_string, tag->end_) &&
            !IsHTMLCommentClose(html_string, tag->end_)) {
       tag->end_++;
     }
diff --git a/chrome_frame/html_utils.h b/chrome_frame/html_utils.h
index 6af5022d95f04..285bc6342e2a2 100644
--- a/chrome_frame/html_utils.h
+++ b/chrome_frame/html_utils.h
@@ -107,7 +107,14 @@ class HTMLScanner {
   // string described by html_string, false otherwise.
   // For example with html_string describing <!-- foo> -->, pos must refer to
   // the last > for this method to return true.
-  bool IsHTMLCommentClose(StringRange* html_string, StrPos pos);
+  bool IsHTMLCommentClose(const StringRange* html_string, StrPos pos);
+
+  // Returns true if pos refers to the last character in the terminator of the
+  // opening tag of a downlevel-hidden conditional comment in IE as per
+  // http://msdn.microsoft.com/en-us/library/ms537512(VS.85).aspx#syntax
+  // For example with html_string describing <![if booga >wooga]>, pos must
+  // refer to the last > for this method to return true.
+  bool IsIEConditionalCommentClose(const StringRange* html_string, StrPos pos);
 
   // We store a (CollapsedWhitespace'd) copy of the html data.
   const std::wstring html_string_;
diff --git a/chrome_frame/test/html_util_unittests.cc b/chrome_frame/test/html_util_unittests.cc
index 02b54a484894a..b18251955079e 100644
--- a/chrome_frame/test/html_util_unittests.cc
+++ b/chrome_frame/test/html_util_unittests.cc
@@ -213,12 +213,112 @@ TEST_F(HtmlUtilUnittest, CloseTagInsideHTMLCommentTest) {
 
   HTMLScanner scanner(test_data.c_str());
 
-  // Grab the meta tag from the document and ensure that we get exactly one.
+  // Ensure that the the meta tag is NOT detected.
+  HTMLScanner::StringRangeList tag_list;
+  scanner.GetTagsByName(L"meta", &tag_list, L"body");
+  ASSERT_TRUE(tag_list.empty());
+}
+
+TEST_F(HtmlUtilUnittest, IEConditionalCommentTest) {
+  std::wstring test_data(
+      L"<!--[if lte IE 8]><META http-equiv=X-UA-Compatible content='chrome=1'/>"
+      L"<![endif]-->");
+
+  HTMLScanner scanner(test_data.c_str());
+
+  // Ensure that the the meta tag IS detected.
+  HTMLScanner::StringRangeList tag_list;
+  scanner.GetTagsByName(L"meta", &tag_list, L"body");
+  ASSERT_EQ(1, tag_list.size());
+}
+
+TEST_F(HtmlUtilUnittest, IEConditionalCommentWithNestedCommentTest) {
+  std::wstring test_data(
+      L"<!--[if IE]><!--<META http-equiv=X-UA-Compatible content='chrome=1'/>"
+      L"--><![endif]-->");
+
+  HTMLScanner scanner(test_data.c_str());
+
+  // Ensure that the the meta tag IS NOT detected.
   HTMLScanner::StringRangeList tag_list;
   scanner.GetTagsByName(L"meta", &tag_list, L"body");
   ASSERT_TRUE(tag_list.empty());
 }
 
+TEST_F(HtmlUtilUnittest, IEConditionalCommentWithMultipleNestedTagsTest) {
+  std::wstring test_data(
+      L"<!--[if lte IE 8]>        <META http-equiv=X-UA-Compatible "
+      L"content='chrome=1'/><foo bar></foo><foo baz/><![endif]-->"
+      L"<boo hoo><boo hah>");
+
+  HTMLScanner scanner(test_data.c_str());
+
+  // Ensure that the the meta tag IS detected.
+  HTMLScanner::StringRangeList meta_tag_list;
+  scanner.GetTagsByName(L"meta", &meta_tag_list, L"body");
+  ASSERT_EQ(1, meta_tag_list.size());
+
+  // Ensure that the foo tags are also detected.
+  HTMLScanner::StringRangeList foo_tag_list;
+  scanner.GetTagsByName(L"foo", &foo_tag_list, L"body");
+  ASSERT_EQ(2, foo_tag_list.size());
+
+  // Ensure that the boo tags are also detected.
+  HTMLScanner::StringRangeList boo_tag_list;
+  scanner.GetTagsByName(L"boo", &boo_tag_list, L"body");
+  ASSERT_EQ(2, boo_tag_list.size());
+}
+
+TEST_F(HtmlUtilUnittest, IEConditionalCommentWithAlternateEndingTest) {
+  std::wstring test_data(
+      L"<!--[if lte IE 8]>        <META http-equiv=X-UA-Compatible "
+      L"content='chrome=1'/><foo bar></foo><foo baz/><![endif]>"
+      L"<boo hoo><!--><boo hah>");
+
+  HTMLScanner scanner(test_data.c_str());
+
+  // Ensure that the the meta tag IS detected.
+  HTMLScanner::StringRangeList meta_tag_list;
+  scanner.GetTagsByName(L"meta", &meta_tag_list, L"body");
+  ASSERT_EQ(1, meta_tag_list.size());
+
+  // Ensure that the foo tags are also detected.
+  HTMLScanner::StringRangeList foo_tag_list;
+  scanner.GetTagsByName(L"foo", &foo_tag_list, L"body");
+  ASSERT_EQ(2, foo_tag_list.size());
+
+  // Ensure that the boo tags are also detected.
+  HTMLScanner::StringRangeList boo_tag_list;
+  scanner.GetTagsByName(L"boo", &boo_tag_list, L"body");
+  ASSERT_EQ(2, boo_tag_list.size());
+}
+
+TEST_F(HtmlUtilUnittest, IEConditionalCommentNonTerminatedTest) {
+  // This test shouldn't detect any tags up until the end of the conditional
+  // comment tag.
+  std::wstring test_data(
+      L"<!--[if lte IE 8>        <META http-equiv=X-UA-Compatible "
+      L"content='chrome=1'/><foo bar></foo><foo baz/><![endif]>"
+      L"<boo hoo><!--><boo hah>");
+
+  HTMLScanner scanner(test_data.c_str());
+
+  // Ensure that the the meta tag IS NOT detected.
+  HTMLScanner::StringRangeList meta_tag_list;
+  scanner.GetTagsByName(L"meta", &meta_tag_list, L"body");
+  ASSERT_TRUE(meta_tag_list.empty());
+
+  // Ensure that the foo tags are NOT detected.
+  HTMLScanner::StringRangeList foo_tag_list;
+  scanner.GetTagsByName(L"foo", &foo_tag_list, L"body");
+  ASSERT_TRUE(foo_tag_list.empty());
+
+  // Ensure that the boo tags are detected.
+  HTMLScanner::StringRangeList boo_tag_list;
+  scanner.GetTagsByName(L"boo", &boo_tag_list, L"body");
+  ASSERT_EQ(2, boo_tag_list.size());
+}
+
 TEST_F(HtmlUtilUnittest, AddChromeFrameToUserAgentValue) {
   struct TestCase {
     std::string input_;
-- 
GitLab