PhutilRemarkupTableBlockRule.php 3.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
<?php

final class PhutilRemarkupTableBlockRule extends PhutilRemarkupBlockRule {

  public function getMatchingLineCount(array $lines, $cursor) {
    $num_lines = 0;

    if (preg_match('/^\s*<table>/i', $lines[$cursor])) {
      $num_lines++;
      $cursor++;

      while (isset($lines[$cursor])) {
        $num_lines++;
        if (preg_match('@</table>\s*$@i', $lines[$cursor])) {
          break;
        }
        $cursor++;
      }
    }

    return $num_lines;
  }

  public function markupText($text, $children) {
    $root = id(new PhutilHTMLParser())
      ->parseDocument($text);

    $nodes = $root->selectChildrenWithTags(array('table'));

    $out = array();
    $seen_table = false;
    foreach ($nodes as $node) {
      if ($node->isContentNode()) {
        $content = $node->getContent();

        if (!strlen(trim($content))) {
          // Ignore whitespace.
          continue;
        }

        // If we find other content, fail the rule. This can happen if the
        // input is two consecutive table tags on one line with some text
        // in between them, which we currently forbid.
        return $text;
      } else {
        // If we have multiple table tags, just return the raw text.
        if ($seen_table) {
          return $text;
        }
        $seen_table = true;

        $out[] = $this->newTable($node);
      }
    }

    if ($this->getEngine()->isTextMode()) {
      return implode('', $out);
    } else {
      return phutil_implode_html('', $out);
    }
  }

  private function newTable(PhutilDOMNode $table) {
    $nodes = $table->selectChildrenWithTags(
      array(
        'colgroup',
        'tr',
      ));

    $colgroup = null;
    $rows = array();

    foreach ($nodes as $node) {
      if ($node->isContentNode()) {
        $content = $node->getContent();

        // If this is whitespace, ignore it.
        if (!strlen(trim($content))) {
          continue;
        }

        // If we have nonempty content between the rows, this isn't a valid
        // table. We can't really do anything reasonable with this, so just
        // fail out and render the raw text.
        return $table->newRawString();
      }

      if ($node->getTagName() === 'colgroup') {
        // This table has multiple "<colgroup />" tags. Just bail out.
        if ($colgroup !== null) {
          return $table->newRawString();
        }

        // This table has a "<colgroup />" after a "<tr />". We could parse
        // this, but just reject it out of an abundance of caution.
        if ($rows) {
          return $table->newRawString();
        }

        $colgroup = $node;
        continue;
      }

      $rows[] = $node;
    }

    $row_specs = array();

    foreach ($rows as $row) {
      $cells = $row->selectChildrenWithTags(array('td', 'th'));

      $cell_specs = array();
      foreach ($cells as $cell) {
        if ($cell->isContentNode()) {
          $content = $node->getContent();

          if (!strlen(trim($content))) {
            continue;
          }

          return $table->newRawString();
        }

124
125
        // Respect newlines in table cells as literal linebreaks.

126
        $content = $cell->newRawContentString();
127
128
129
130
131
132
133
134
135
136
        $content = trim($content, "\r\n");

        $lines = phutil_split_lines($content, $retain_endings = false);
        foreach ($lines as $key => $line) {
          $lines[$key] = $this->applyRules($line);
        }

        $content = phutil_implode_html(
          phutil_tag('br'),
          $lines);
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

        $cell_specs[] = array(
          'type' => $cell->getTagName(),
          'content' => $content,
        );
      }

      $row_specs[] = array(
        'type' => 'tr',
        'content' => $cell_specs,
      );
    }

    return $this->renderRemarkupTable($row_specs);
  }

}