PhutilRemarkupTableBlockRule.php 3.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
<?php

final class PhutilRemarkupTableBlockRule extends PhutilRemarkupBlockRule {

  public function getMatchingLineCount(array $lines, $cursor) {
    $num_lines = 0;

    if (preg_match('/^\s*<table>/i', $lines[$cursor])) {
      $num_lines++;
      $cursor++;

      while (isset($lines[$cursor])) {
        $num_lines++;
        if (preg_match('@</table>\s*$@i', $lines[$cursor])) {
          break;
        }
        $cursor++;
      }
    }

    return $num_lines;
  }

  public function markupText($text, $children) {
    $root = id(new PhutilHTMLParser())
      ->parseDocument($text);

    $nodes = $root->selectChildrenWithTags(array('table'));

    $out = array();
    $seen_table = false;
    foreach ($nodes as $node) {
      if ($node->isContentNode()) {
        $content = $node->getContent();

        if (!strlen(trim($content))) {
          // Ignore whitespace.
          continue;
        }

        // If we find other content, fail the rule. This can happen if the
        // input is two consecutive table tags on one line with some text
        // in between them, which we currently forbid.
        return $text;
      } else {
        // If we have multiple table tags, just return the raw text.
        if ($seen_table) {
          return $text;
        }
        $seen_table = true;

        $out[] = $this->newTable($node);
      }
    }

    if ($this->getEngine()->isTextMode()) {
      return implode('', $out);
    } else {
      return phutil_implode_html('', $out);
    }
  }

  private function newTable(PhutilDOMNode $table) {
    $nodes = $table->selectChildrenWithTags(
      array(
        'colgroup',
        'tr',
      ));

    $colgroup = null;
    $rows = array();

    foreach ($nodes as $node) {
      if ($node->isContentNode()) {
        $content = $node->getContent();

        // If this is whitespace, ignore it.
        if (!strlen(trim($content))) {
          continue;
        }

        // If we have nonempty content between the rows, this isn't a valid
        // table. We can't really do anything reasonable with this, so just
        // fail out and render the raw text.
        return $table->newRawString();
      }

      if ($node->getTagName() === 'colgroup') {
        // This table has multiple "<colgroup />" tags. Just bail out.
        if ($colgroup !== null) {
          return $table->newRawString();
        }

        // This table has a "<colgroup />" after a "<tr />". We could parse
        // this, but just reject it out of an abundance of caution.
        if ($rows) {
          return $table->newRawString();
        }

        $colgroup = $node;
        continue;
      }

      $rows[] = $node;
    }

    $row_specs = array();

    foreach ($rows as $row) {
      $cells = $row->selectChildrenWithTags(array('td', 'th'));

      $cell_specs = array();
      foreach ($cells as $cell) {
        if ($cell->isContentNode()) {
          $content = $node->getContent();

          if (!strlen(trim($content))) {
            continue;
          }

          return $table->newRawString();
        }

124 125
        // Respect newlines in table cells as literal linebreaks.

126
        $content = $cell->newRawContentString();
127 128 129 130 131 132 133 134 135 136
        $content = trim($content, "\r\n");

        $lines = phutil_split_lines($content, $retain_endings = false);
        foreach ($lines as $key => $line) {
          $lines[$key] = $this->applyRules($line);
        }

        $content = phutil_implode_html(
          phutil_tag('br'),
          $lines);
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153

        $cell_specs[] = array(
          'type' => $cell->getTagName(),
          'content' => $content,
        );
      }

      $row_specs[] = array(
        'type' => 'tr',
        'content' => $cell_specs,
      );
    }

    return $this->renderRemarkupTable($row_specs);
  }

}