Commit b5ed55cc authored by Gabriel Krisman Bertazi's avatar Gabriel Krisman Bertazi
Browse files

lib: Implement the ciopfs basic behavior



This is a dummy implementation for performance comparison with the FUSE
filesystem ciopfs.  It converts names to lowercase on-disk, so it is not
stable, but it is what gives the best performance.  In comparison to
ciopfs, it performs 10x faster when doing inexact-case searches than
CIOPFS.  For case-exact matches, they perform pretty much the same,
which is pretty much the same as a flat access to the filesystem,
i.e. we won't  get any better on that case.

But, it has the same issues as CIOPFS:

  - Not stable names
  - Uppercase Files created outside the CI mountpoint are hidden.
  - Doesn't handle utf8.

This is a very trivial implementation, but one which demonstrates the
advantages of syscall_intercept over FUSE approaches for this basic
scenario.

The benchmark I used for the above was pathwalk, which does a simple,
create, randomize pathname case and search. For 10k files, The following
times were collected.

=== CI lookups ===

* libcasefold test:  inexact match of 10k files
LD_PRELOAD=libcasefold.so ./pathwalkci /tmp/ci/ 10000 ci
created 10000 files
Performing case inexact lookup
found 10000 files. TIME: 0.011

* CIOPS test:  inexact match of 10k files
./pathwalkci /tmp/ciopfs-ci/ 10000 ci
[git:master@pathwalkci]
created 10000 files
Performing case inexact lookup
found 10000 files. TIME: 0.118

=== CS lookups ===

* libcasefold test:  Exact matches of 10k files
LD_PRELOAD=libcasefold.so   ./pathwalkci /tmp/ci/ 10000
created 10000 files
Performing case exact lookup
found 10000 files. TIME: 0.011

* CIOPS test:  exact match of 10k files
./pathwalkci /tmp/ciopfs-ci/ 10000
created 10000 files
Performing case exact lookup
found 10000 files. TIME: 0.014

* Flat tests: Exact match of 10k files
./pathwalkci /tmp/ci/ 10000
created 10000 files
Performing case exact lookup
found 10000 files. TIME: 0.009

Signed-off-by: default avatarGabriel Krisman Bertazi <krisman@collabora.co.uk>
parent 62d82062
......@@ -22,135 +22,11 @@
#define print_debug(str)
#endif
struct linux_dirent64 {
long d_ino; /* 64-bit inode number */
unsigned long d_off; /* 64-bit offset to next structure */
unsigned short d_reclen; /* Size of this dirent */
unsigned char d_type; /* File type */
char d_name[]; /* Filename (null-terminated) */
};
static inline int ni_getdents64(unsigned int fd, char *buffer,
unsigned int size)
{
return syscall_no_intercept(SYS_getdents64, fd, buffer, size);
}
static inline int ni_open(const char *pathname, int flags, int mode)
{
return syscall_no_intercept(SYS_open, pathname, flags, mode);
}
static int open_directory(char *path)
{
return ni_open(path, O_DIRECTORY|O_RDONLY, 0);
}
static struct linux_dirent64 *search_block(char *ciname, char *buffer, int size)
{
int off;
struct linux_dirent64 *dentry;
for (off = 0; off < size; off += dentry->d_reclen) {
dentry = (struct linux_dirent64 *) (buffer + off);
if (strcasecmp(dentry->d_name, ciname) == 0) {
return dentry;
}
}
return NULL;
}
static int ci_lookup(int parent, char *component,
char *buffer, int size,
struct linux_dirent64 **d_result)
{
struct linux_dirent64 *dentry;
int nread;
for(;;) {
nread = ni_getdents64(parent, buffer, size);
if (nread <= 0)
return 0;
dentry = search_block(component, buffer, nread);
if (dentry) {
*d_result = dentry;
return 1;
}
}
return 0;
}
static int search_ci_path(char *parent, char *path, char *rpath)
{
int bufsiz = sizeof(struct linux_dirent64) *1000;
char *buffer = malloc(sizeof(char)*bufsiz);
char *saveptr, *component;
struct linux_dirent64 *dentry;
int rpath_off = 0;
int parent_fd = open_directory(parent);
int ret = 0;
if (parent_fd < 0)
return -ENOENT;
rpath_off = strlen(parent);
memcpy(rpath, parent, rpath_off);
component = strtok_r(path, "/", &saveptr);
while (component) {
if (component[0] == '.') {
if (component[1] == '.') {
// DOTDOT
continue;
} else if (component [1] == '\0')
continue;
}
if(!ci_lookup(parent_fd, component, buffer, bufsiz, &dentry)) {
ret = -ENOENT;
goto out_close_parent;
}
rpath[rpath_off++] = '/';
memcpy(rpath+rpath_off, dentry->d_name, strlen(dentry->d_name));
rpath_off += strlen(dentry->d_name);
rpath[rpath_off] = '\0';
if (rpath_off > PATH_MAX) {
ret = -ENAMETOOLONG;
goto out_close_parent;
}
component = strtok_r(NULL, "/", &saveptr);
close(parent_fd);
if (!component)
break;
/* parent_fd is not the last directory in the path. we
* need to open the next component as a directory and
* repeat. */
parent_fd = open_directory(rpath);
if (parent_fd < 0) {
ret = -ENOENT;
goto out;
}
}
/* there is a CI component with exact path in r. Success*/
out_close_parent:
close(parent_fd);
out:
free(buffer);
return ret;
}
static int syscall_hook(long syscall_number, long arg0, long arg1,
long arg2, long arg3,long arg4, long arg5,
long *result)
{
int r;
char **path, rpath[PATH_MAX];
char **path, *original_path;
int parent_index;
switch (syscall_number)
......@@ -176,21 +52,23 @@ static int syscall_hook(long syscall_number, long arg0, long arg1,
if (parent_index < 0)
goto exec_syscall;
/* The original path needs to be returned to the caller
* unmodified. We don't want to confuse upper layers more than
* we have to. */
original_path = *path;
*path = strdup(*path);
(*path)[parent_index] = '\0';
r = search_ci_path(*path, &(*path)[parent_index+1], rpath);
(*path)[parent_index] = '/';
if (r) {
*result = -ENOENT;
return 0;
}
for (int i = parent_index; (*path)[i] != '\0'; i++)
if ((*path)[i] >= 'a' && (*path)[i] <= 'z')
(*path)[i] &= ~0x20;
/* do invocation and return */
*path = rpath;
*result = syscall_no_intercept(syscall_number, arg0, arg1, arg2,
arg3, arg4, arg5);
/* Remove the modified path and recover the original path. */
free(*path);
*path = original_path;
return 0;
exec_syscall:
......@@ -204,5 +82,3 @@ static __attribute__((constructor)) void init(void)
load_mount_points();
intercept_hook_point = syscall_hook;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment