strbuf.c 6.01 KB
Newer Older
1 2 3 4 5
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/

/***
  This file is part of systemd.

6
  Copyright 2012 Kay Sievers <kay@vrfy.org>
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include <stdlib.h>
#include <string.h>

#include "util.h"
#include "strbuf.h"

Kay Sievers's avatar
Kay Sievers committed
28
/*
29
 * Strbuf stores given strings in a single continuous allocated memory
Kay Sievers's avatar
Kay Sievers committed
30 31 32
 * area. Identical strings are de-duplicated and return the same offset
 * as the first string stored. If the tail of a string already exists
 * in the buffer, the tail is returned.
Kay Sievers's avatar
Kay Sievers committed
33
 *
Kay Sievers's avatar
Kay Sievers committed
34 35
 * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
 * information about the stored strings.
Kay Sievers's avatar
Kay Sievers committed
36 37 38 39 40 41 42 43 44 45
 *
 * Example of udev rules:
 *   $ ./udevadm test .
 *   ...
 *   read rules file: /usr/lib/udev/rules.d/99-systemd.rules
 *   rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
 *   23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
 *   ...
 */

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
struct strbuf *strbuf_new(void) {
        struct strbuf *str;

        str = new0(struct strbuf, 1);
        if (!str)
                return NULL;

        str->buf = new0(char, 1);
        if (!str->buf)
                goto err;
        str->len = 1;

        str->root = new0(struct strbuf_node, 1);
        if (!str->root)
                goto err;
        str->nodes_count = 1;
        return str;
err:
        free(str->buf);
        free(str->root);
        free(str);
        return NULL;
}

static void strbuf_node_cleanup(struct strbuf_node *node) {
        size_t i;

        for (i = 0; i < node->children_count; i++)
                strbuf_node_cleanup(node->children[i].child);
        free(node->children);
        free(node);
}

Kay Sievers's avatar
Kay Sievers committed
79
/* clean up trie data, leave only the string buffer */
80 81 82 83 84 85 86 87
void strbuf_complete(struct strbuf *str) {
        if (!str)
                return;
        if (str->root)
                strbuf_node_cleanup(str->root);
        str->root = NULL;
}

Kay Sievers's avatar
Kay Sievers committed
88
/* clean up everything */
89 90 91 92 93 94 95 96 97
void strbuf_cleanup(struct strbuf *str) {
        if (!str)
                return;
        if (str->root)
                strbuf_node_cleanup(str->root);
        free(str->buf);
        free(str);
}

98 99
static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
                               const struct strbuf_child_entry *n2) {
100 101 102
        return n1->c - n2->c;
}

103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
static void bubbleinsert(struct strbuf_node *node,
                         uint8_t c,
                         struct strbuf_node *node_child) {

        struct strbuf_child_entry new = {
                .c = c,
                .child = node_child,
        };
        int left = 0, right = node->children_count;

        while (right > left) {
                int middle = (right + left) / 2 ;
                if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
                        left = middle + 1;
                else
                        right = middle;
        }

        memmove(node->children + left + 1, node->children + left,
                sizeof(struct strbuf_child_entry) * (node->children_count - left));
        node->children[left] = new;

        node->children_count ++;
}

Kay Sievers's avatar
Kay Sievers committed
128
/* add string, return the index/offset into the buffer */
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
        uint8_t c;
        struct strbuf_node *node;
        size_t depth;
        char *buf_new;
        struct strbuf_child_entry *child;
        struct strbuf_node *node_child;
        ssize_t off;

        if (!str->root)
                return -EINVAL;

        /* search string; start from last character to find possibly matching tails */
        if (len == 0)
                return 0;
        str->in_count++;
        str->in_len += len;

        node = str->root;
        c = s[len-1];
        for (depth = 0; depth <= len; depth++) {
                struct strbuf_child_entry search;

                /* match against current node */
                off = node->value_off + node->value_len - len;
                if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
                        str->dedup_len += len;
                        str->dedup_count++;
                        return off;
                }

                /* lookup child node */
                c = s[len - 1 - depth];
                search.c = c;
163 164 165
                child = bsearch(&search, node->children, node->children_count,
                                sizeof(struct strbuf_child_entry),
                                (__compar_fn_t) strbuf_children_cmp);
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
                if (!child)
                        break;
                node = child->child;
        }

        /* add new string */
        buf_new = realloc(str->buf, str->len + len+1);
        if (!buf_new)
                return -ENOMEM;
        str->buf = buf_new;
        off = str->len;
        memcpy(str->buf + off, s, len);
        str->len += len;
        str->buf[str->len++] = '\0';

        /* new node */
        node_child = new0(struct strbuf_node, 1);
        if (!node_child)
                return -ENOMEM;
        node_child->value_off = off;
        node_child->value_len = len;

        /* extend array, add new entry, sort for bisection */
        child = realloc(node->children, (node->children_count + 1) * sizeof(struct strbuf_child_entry));
190 191
        if (!child) {
                free(node_child);
192
                return -ENOMEM;
193 194 195 196
        }

        str->nodes_count++;

197
        node->children = child;
198
        bubbleinsert(node, c, node_child);
199 200 201

        return off;
}