mcoverlayfs: add new base from 4.18.14

This just lays out new files so the next commit is easier to review;
nothing changes here

Change-Id: I66669877d2d10632f5436c0eeb32248cd4c8b996
This commit is contained in:
Dominique Martinet
2018-10-17 10:48:51 +09:00
parent 6581f9b4b2
commit fc2775c932
11 changed files with 8677 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
KDIR ?= @KDIR@
ARCH ?= @ARCH@
# POSTK_DEBUG_ARCH_DEP_105 make install DESTDIR enable.
# KMODDIR = @KMODDIR@
DESTDIR ?= @DESTDIR@
KMODDIR = $(DESTDIR)/@KMODDIR@
src = @abs_srcdir@
obj-m += mcoverlay.o
mcoverlay-y := copy_up.o dir.o inode.o readdir.o super.o export.o namei.o util.o
.PHONY: clean install modules
modules:
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
clean:
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
install:
mkdir -p -m 755 $(KMODDIR)
install -m 644 mcoverlay.ko $(KMODDIR)

View File

@@ -0,0 +1,801 @@
/*
*
* Copyright (C) 2011 Novell Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/splice.h>
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/uaccess.h>
#include <linux/sched/signal.h>
#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/fdtable.h>
#include <linux/ratelimit.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
static bool __read_mostly ovl_check_copy_up;
module_param_named(check_copy_up, ovl_check_copy_up, bool,
S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(ovl_check_copy_up,
"Warn on copy-up when causing process also has a R/O fd open");
static int ovl_check_fd(const void *data, struct file *f, unsigned int fd)
{
const struct dentry *dentry = data;
if (file_inode(f) == d_inode(dentry))
pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
f, fd, current->pid, current->comm);
return 0;
}
/*
* Check the fds open by this process and warn if something like the following
* scenario is about to occur:
*
* fd1 = open("foo", O_RDONLY);
* fd2 = open("foo", O_RDWR);
*/
static void ovl_do_check_copy_up(struct dentry *dentry)
{
if (ovl_check_copy_up)
iterate_fd(current->files, 0, ovl_check_fd, dentry);
}
int ovl_copy_xattr(struct dentry *old, struct dentry *new)
{
ssize_t list_size, size, value_size = 0;
char *buf, *name, *value = NULL;
int uninitialized_var(error);
size_t slen;
if (!(old->d_inode->i_opflags & IOP_XATTR) ||
!(new->d_inode->i_opflags & IOP_XATTR))
return 0;
list_size = vfs_listxattr(old, NULL, 0);
if (list_size <= 0) {
if (list_size == -EOPNOTSUPP)
return 0;
return list_size;
}
buf = kzalloc(list_size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
list_size = vfs_listxattr(old, buf, list_size);
if (list_size <= 0) {
error = list_size;
goto out;
}
for (name = buf; list_size; name += slen) {
slen = strnlen(name, list_size) + 1;
/* underlying fs providing us with an broken xattr list? */
if (WARN_ON(slen > list_size)) {
error = -EIO;
break;
}
list_size -= slen;
if (ovl_is_private_xattr(name))
continue;
retry:
size = vfs_getxattr(old, name, value, value_size);
if (size == -ERANGE)
size = vfs_getxattr(old, name, NULL, 0);
if (size < 0) {
error = size;
break;
}
if (size > value_size) {
void *new;
new = krealloc(value, size, GFP_KERNEL);
if (!new) {
error = -ENOMEM;
break;
}
value = new;
value_size = size;
goto retry;
}
error = security_inode_copy_up_xattr(name);
if (error < 0 && error != -EOPNOTSUPP)
break;
if (error == 1) {
error = 0;
continue; /* Discard */
}
error = vfs_setxattr(new, name, value, size, 0);
if (error)
break;
}
kfree(value);
out:
kfree(buf);
return error;
}
static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
{
struct file *old_file;
struct file *new_file;
loff_t old_pos = 0;
loff_t new_pos = 0;
int error = 0;
if (len == 0)
return 0;
old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
if (IS_ERR(old_file))
return PTR_ERR(old_file);
new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
if (IS_ERR(new_file)) {
error = PTR_ERR(new_file);
goto out_fput;
}
/* Try to use clone_file_range to clone up within the same fs */
error = vfs_clone_file_range(old_file, 0, new_file, 0, len);
if (!error)
goto out;
/* Couldn't clone, so now we try to copy the data */
error = 0;
/* FIXME: copy up sparse files efficiently */
while (len) {
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
long bytes;
if (len < this_len)
this_len = len;
if (signal_pending_state(TASK_KILLABLE, current)) {
error = -EINTR;
break;
}
bytes = do_splice_direct(old_file, &old_pos,
new_file, &new_pos,
this_len, SPLICE_F_MOVE);
if (bytes <= 0) {
error = bytes;
break;
}
WARN_ON(old_pos != new_pos);
len -= bytes;
}
out:
if (!error)
error = vfs_fsync(new_file, 0);
fput(new_file);
out_fput:
fput(old_file);
return error;
}
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
{
struct iattr attr = {
.ia_valid =
ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
.ia_atime = stat->atime,
.ia_mtime = stat->mtime,
};
return notify_change(upperdentry, &attr, NULL);
}
int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
{
int err = 0;
if (!S_ISLNK(stat->mode)) {
struct iattr attr = {
.ia_valid = ATTR_MODE,
.ia_mode = stat->mode,
};
err = notify_change(upperdentry, &attr, NULL);
}
if (!err) {
struct iattr attr = {
.ia_valid = ATTR_UID | ATTR_GID,
.ia_uid = stat->uid,
.ia_gid = stat->gid,
};
err = notify_change(upperdentry, &attr, NULL);
}
if (!err)
ovl_set_timestamps(upperdentry, stat);
return err;
}
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
{
struct ovl_fh *fh;
int fh_type, fh_len, dwords;
void *buf;
int buflen = MAX_HANDLE_SZ;
uuid_t *uuid = &real->d_sb->s_uuid;
buf = kmalloc(buflen, GFP_KERNEL);
if (!buf)
return ERR_PTR(-ENOMEM);
/*
* We encode a non-connectable file handle for non-dir, because we
* only need to find the lower inode number and we don't want to pay
* the price or reconnecting the dentry.
*/
dwords = buflen >> 2;
fh_type = exportfs_encode_fh(real, buf, &dwords, 0);
buflen = (dwords << 2);
fh = ERR_PTR(-EIO);
if (WARN_ON(fh_type < 0) ||
WARN_ON(buflen > MAX_HANDLE_SZ) ||
WARN_ON(fh_type == FILEID_INVALID))
goto out;
BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
fh_len = offsetof(struct ovl_fh, fid) + buflen;
fh = kmalloc(fh_len, GFP_KERNEL);
if (!fh) {
fh = ERR_PTR(-ENOMEM);
goto out;
}
fh->version = OVL_FH_VERSION;
fh->magic = OVL_FH_MAGIC;
fh->type = fh_type;
fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
/*
* When we will want to decode an overlay dentry from this handle
* and all layers are on the same fs, if we get a disconncted real
* dentry when we decode fid, the only way to tell if we should assign
* it to upperdentry or to lowerstack is by checking this flag.
*/
if (is_upper)
fh->flags |= OVL_FH_FLAG_PATH_UPPER;
fh->len = fh_len;
fh->uuid = *uuid;
memcpy(fh->fid, buf, buflen);
out:
kfree(buf);
return fh;
}
int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper)
{
const struct ovl_fh *fh = NULL;
int err;
/*
* When lower layer doesn't support export operations store a 'null' fh,
* so we can use the overlay.origin xattr to distignuish between a copy
* up and a pure upper inode.
*/
if (ovl_can_decode_fh(lower->d_sb)) {
fh = ovl_encode_real_fh(lower, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
}
/*
* Do not fail when upper doesn't support xattrs.
*/
err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
fh ? fh->len : 0, 0);
kfree(fh);
return err;
}
/* Store file handle of @upper dir in @index dir entry */
static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
{
const struct ovl_fh *fh;
int err;
fh = ovl_encode_real_fh(upper, true);
if (IS_ERR(fh))
return PTR_ERR(fh);
err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0);
kfree(fh);
return err;
}
/*
* Create and install index entry.
*
* Caller must hold i_mutex on indexdir.
*/
static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
struct dentry *upper)
{
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
struct inode *dir = d_inode(indexdir);
struct dentry *index = NULL;
struct dentry *temp = NULL;
struct qstr name = { };
int err;
/*
* For now this is only used for creating index entry for directories,
* because non-dir are copied up directly to index and then hardlinked
* to upper dir.
*
* TODO: implement create index for non-dir, so we can call it when
* encoding file handle for non-dir in case index does not exist.
*/
if (WARN_ON(!d_is_dir(dentry)))
return -EIO;
/* Directory not expected to be indexed before copy up */
if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
return -EIO;
err = ovl_get_index_name(origin, &name);
if (err)
return err;
temp = ovl_create_temp(indexdir, OVL_CATTR(S_IFDIR | 0));
err = PTR_ERR(temp);
if (IS_ERR(temp))
goto free_name;
err = ovl_set_upper_fh(upper, temp);
if (err)
goto out;
index = lookup_one_len(name.name, indexdir, name.len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
} else {
err = ovl_do_rename(dir, temp, dir, index, 0);
dput(index);
}
out:
if (err)
ovl_cleanup(dir, temp);
dput(temp);
free_name:
kfree(name.name);
return err;
}
struct ovl_copy_up_ctx {
struct dentry *parent;
struct dentry *dentry;
struct path lowerpath;
struct kstat stat;
struct kstat pstat;
const char *link;
struct dentry *destdir;
struct qstr destname;
struct dentry *workdir;
bool tmpfile;
bool origin;
bool indexed;
};
static int ovl_link_up(struct ovl_copy_up_ctx *c)
{
int err;
struct dentry *upper;
struct dentry *upperdir = ovl_dentry_upper(c->parent);
struct inode *udir = d_inode(upperdir);
/* Mark parent "impure" because it may now contain non-pure upper */
err = ovl_set_impure(c->parent, upperdir);
if (err)
return err;
err = ovl_set_nlink_lower(c->dentry);
if (err)
return err;
inode_lock_nested(udir, I_MUTEX_PARENT);
upper = lookup_one_len(c->dentry->d_name.name, upperdir,
c->dentry->d_name.len);
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper);
dput(upper);
if (!err) {
/* Restore timestamps on parent (best effort) */
ovl_set_timestamps(upperdir, &c->pstat);
ovl_dentry_set_upper_alias(c->dentry);
}
}
inode_unlock(udir);
if (err)
return err;
err = ovl_set_nlink_upper(c->dentry);
return err;
}
static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
struct dentry **newdentry)
{
int err;
struct dentry *upper;
struct inode *udir = d_inode(c->destdir);
upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
if (IS_ERR(upper))
return PTR_ERR(upper);
if (c->tmpfile)
err = ovl_do_link(temp, udir, upper);
else
err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
if (!err)
*newdentry = dget(c->tmpfile ? upper : temp);
dput(upper);
return err;
}
static struct dentry *ovl_get_tmpfile(struct ovl_copy_up_ctx *c)
{
int err;
struct dentry *temp;
const struct cred *old_creds = NULL;
struct cred *new_creds = NULL;
struct ovl_cattr cattr = {
/* Can't properly set mode on creation because of the umask */
.mode = c->stat.mode & S_IFMT,
.rdev = c->stat.rdev,
.link = c->link
};
err = security_inode_copy_up(c->dentry, &new_creds);
temp = ERR_PTR(err);
if (err < 0)
goto out;
if (new_creds)
old_creds = override_creds(new_creds);
if (c->tmpfile)
temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
else
temp = ovl_create_temp(c->workdir, &cattr);
out:
if (new_creds) {
revert_creds(old_creds);
put_cred(new_creds);
}
return temp;
}
static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
{
int err;
if (S_ISREG(c->stat.mode)) {
struct path upperpath;
ovl_path_upper(c->dentry, &upperpath);
BUG_ON(upperpath.dentry != NULL);
upperpath.dentry = temp;
err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
if (err)
return err;
}
err = ovl_copy_xattr(c->lowerpath.dentry, temp);
if (err)
return err;
inode_lock(temp->d_inode);
err = ovl_set_attr(temp, &c->stat);
inode_unlock(temp->d_inode);
if (err)
return err;
/*
* Store identifier of lower inode in upper inode xattr to
* allow lookup of the copy up origin inode.
*
* Don't set origin when we are breaking the association with a lower
* hard link.
*/
if (c->origin) {
err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
if (err)
return err;
}
return 0;
}
static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
{
struct inode *udir = c->destdir->d_inode;
struct inode *inode;
struct dentry *newdentry = NULL;
struct dentry *temp;
int err;
temp = ovl_get_tmpfile(c);
if (IS_ERR(temp))
return PTR_ERR(temp);
err = ovl_copy_up_inode(c, temp);
if (err)
goto out;
if (S_ISDIR(c->stat.mode) && c->indexed) {
err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
if (err)
goto out;
}
if (c->tmpfile) {
inode_lock_nested(udir, I_MUTEX_PARENT);
err = ovl_install_temp(c, temp, &newdentry);
inode_unlock(udir);
} else {
err = ovl_install_temp(c, temp, &newdentry);
}
if (err)
goto out;
inode = d_inode(c->dentry);
ovl_inode_update(inode, newdentry);
if (S_ISDIR(inode->i_mode))
ovl_set_flag(OVL_WHITEOUTS, inode);
out:
if (err && !c->tmpfile)
ovl_cleanup(d_inode(c->workdir), temp);
dput(temp);
return err;
}
/*
* Copy up a single dentry
*
* All renames start with copy up of source if necessary. The actual
* rename will only proceed once the copy up was successful. Copy up uses
* upper parent i_mutex for exclusion. Since rename can change d_parent it
* is possible that the copy up will lock the old parent. At that point
* the file will have already been copied up anyway.
*/
static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
{
int err;
struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
bool to_index = false;
/*
* Indexed non-dir is copied up directly to the index entry and then
* hardlinked to upper dir. Indexed dir is copied up to indexdir,
* then index entry is created and then copied up dir installed.
* Copying dir up to indexdir instead of workdir simplifies locking.
*/
if (ovl_need_index(c->dentry)) {
c->indexed = true;
if (S_ISDIR(c->stat.mode))
c->workdir = ovl_indexdir(c->dentry->d_sb);
else
to_index = true;
}
if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
c->origin = true;
if (to_index) {
c->destdir = ovl_indexdir(c->dentry->d_sb);
err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
if (err)
return err;
} else if (WARN_ON(!c->parent)) {
/* Disconnected dentry must be copied up to index dir */
return -EIO;
} else {
/*
* Mark parent "impure" because it may now contain non-pure
* upper
*/
err = ovl_set_impure(c->parent, c->destdir);
if (err)
return err;
}
/* Should we copyup with O_TMPFILE or with workdir? */
if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
c->tmpfile = true;
err = ovl_copy_up_locked(c);
} else {
err = ovl_lock_rename_workdir(c->workdir, c->destdir);
if (!err) {
err = ovl_copy_up_locked(c);
unlock_rename(c->workdir, c->destdir);
}
}
if (err)
goto out;
if (c->indexed)
ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
if (to_index) {
/* Initialize nlink for copy up of disconnected dentry */
err = ovl_set_nlink_upper(c->dentry);
} else {
struct inode *udir = d_inode(c->destdir);
/* Restore timestamps on parent (best effort) */
inode_lock(udir);
ovl_set_timestamps(c->destdir, &c->pstat);
inode_unlock(udir);
ovl_dentry_set_upper_alias(c->dentry);
}
out:
if (to_index)
kfree(c->destname.name);
return err;
}
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
int flags)
{
int err;
DEFINE_DELAYED_CALL(done);
struct path parentpath;
struct ovl_copy_up_ctx ctx = {
.parent = parent,
.dentry = dentry,
.workdir = ovl_workdir(dentry),
};
if (WARN_ON(!ctx.workdir))
return -EROFS;
ovl_path_lower(dentry, &ctx.lowerpath);
err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
return err;
if (parent) {
ovl_path_upper(parent, &parentpath);
ctx.destdir = parentpath.dentry;
ctx.destname = dentry->d_name;
err = vfs_getattr(&parentpath, &ctx.pstat,
STATX_ATIME | STATX_MTIME,
AT_STATX_SYNC_AS_STAT);
if (err)
return err;
}
/* maybe truncate regular file. this has no effect on dirs */
if (flags & O_TRUNC)
ctx.stat.size = 0;
if (S_ISLNK(ctx.stat.mode)) {
ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
if (IS_ERR(ctx.link))
return PTR_ERR(ctx.link);
}
ovl_do_check_copy_up(ctx.lowerpath.dentry);
err = ovl_copy_up_start(dentry);
/* err < 0: interrupted, err > 0: raced with another copy-up */
if (unlikely(err)) {
if (err > 0)
err = 0;
} else {
if (!ovl_dentry_upper(dentry))
err = ovl_do_copy_up(&ctx);
if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
err = ovl_link_up(&ctx);
ovl_copy_up_end(dentry);
}
do_delayed_call(&done);
return err;
}
int ovl_copy_up_flags(struct dentry *dentry, int flags)
{
int err = 0;
const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
/*
* With NFS export, copy up can get called for a disconnected non-dir.
* In this case, we will copy up lower inode to index dir without
* linking it to upper dir.
*/
if (WARN_ON(disconnected && d_is_dir(dentry)))
return -EIO;
while (!err) {
struct dentry *next;
struct dentry *parent = NULL;
/*
* Check if copy-up has happened as well as for upper alias (in
* case of hard links) is there.
*
* Both checks are lockless:
* - false negatives: will recheck under oi->lock
* - false positives:
* + ovl_dentry_upper() uses memory barriers to ensure the
* upper dentry is up-to-date
* + ovl_dentry_has_upper_alias() relies on locking of
* upper parent i_rwsem to prevent reordering copy-up
* with rename.
*/
if (ovl_dentry_upper(dentry) &&
(ovl_dentry_has_upper_alias(dentry) || disconnected))
break;
next = dget(dentry);
/* find the topmost dentry not yet copied up */
for (; !disconnected;) {
parent = dget_parent(next);
if (ovl_dentry_upper(parent))
break;
dput(next);
next = parent;
}
err = ovl_copy_up_one(parent, next, flags);
dput(parent);
dput(next);
}
revert_creds(old_cred);
return err;
}
int ovl_copy_up(struct dentry *dentry)
{
return ovl_copy_up_flags(dentry, 0);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,848 @@
/*
* Overlayfs NFS export support.
*
* Amir Goldstein <amir73il@gmail.com>
*
* Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/cred.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/exportfs.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
static int ovl_encode_maybe_copy_up(struct dentry *dentry)
{
int err;
if (ovl_dentry_upper(dentry))
return 0;
err = ovl_want_write(dentry);
if (!err) {
err = ovl_copy_up(dentry);
ovl_drop_write(dentry);
}
if (err) {
pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n",
dentry, err);
}
return err;
}
/*
* Before encoding a non-upper directory file handle from real layer N, we need
* to check if it will be possible to reconnect an overlay dentry from the real
* lower decoded dentry. This is done by following the overlay ancestry up to a
* "layer N connected" ancestor and verifying that all parents along the way are
* "layer N connectable". If an ancestor that is NOT "layer N connectable" is
* found, we need to copy up an ancestor, which is "layer N connectable", thus
* making that ancestor "layer N connected". For example:
*
* layer 1: /a
* layer 2: /a/b/c
*
* The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is
* copied up and renamed, upper dir /a will be indexed by lower dir /a from
* layer 1. The dir /a from layer 2 will never be indexed, so the algorithm (*)
* in ovl_lookup_real_ancestor() will not be able to lookup a connected overlay
* dentry from the connected lower dentry /a/b/c.
*
* To avoid this problem on decode time, we need to copy up an ancestor of
* /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is
* /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected"
* and when the time comes to decode the file handle from lower dentry /a/b/c,
* ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding
* a connected overlay dentry will be accomplished.
*
* (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an
* entry /a in the lower layers above layer N and find the indexed dir /a from
* layer 1. If that improvement is made, then the check for "layer N connected"
* will need to verify there are no redirects in lower layers above N. In the
* example above, /a will be "layer 2 connectable". However, if layer 2 dir /a
* is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable":
*
* layer 1: /A (redirect = /a)
* layer 2: /a/b/c
*/
/* Return the lowest layer for encoding a connectable file handle */
static int ovl_connectable_layer(struct dentry *dentry)
{
struct ovl_entry *oe = OVL_E(dentry);
/* We can get overlay root from root of any layer */
if (dentry == dentry->d_sb->s_root)
return oe->numlower;
/*
* If it's an unindexed merge dir, then it's not connectable with any
* lower layer
*/
if (ovl_dentry_upper(dentry) &&
!ovl_test_flag(OVL_INDEX, d_inode(dentry)))
return 0;
/* We can get upper/overlay path from indexed/lower dentry */
return oe->lowerstack[0].layer->idx;
}
/*
* @dentry is "connected" if all ancestors up to root or a "connected" ancestor
* have the same uppermost lower layer as the origin's layer. We may need to
* copy up a "connectable" ancestor to make it "connected". A "connected" dentry
* cannot become non "connected", so cache positive result in dentry flags.
*
* Return the connected origin layer or < 0 on error.
*/
static int ovl_connect_layer(struct dentry *dentry)
{
struct dentry *next, *parent = NULL;
int origin_layer;
int err = 0;
if (WARN_ON(dentry == dentry->d_sb->s_root) ||
WARN_ON(!ovl_dentry_lower(dentry)))
return -EIO;
origin_layer = OVL_E(dentry)->lowerstack[0].layer->idx;
if (ovl_dentry_test_flag(OVL_E_CONNECTED, dentry))
return origin_layer;
/* Find the topmost origin layer connectable ancestor of @dentry */
next = dget(dentry);
for (;;) {
parent = dget_parent(next);
if (WARN_ON(parent == next)) {
err = -EIO;
break;
}
/*
* If @parent is not origin layer connectable, then copy up
* @next which is origin layer connectable and we are done.
*/
if (ovl_connectable_layer(parent) < origin_layer) {
err = ovl_encode_maybe_copy_up(next);
break;
}
/* If @parent is connected or indexed we are done */
if (ovl_dentry_test_flag(OVL_E_CONNECTED, parent) ||
ovl_test_flag(OVL_INDEX, d_inode(parent)))
break;
dput(next);
next = parent;
}
dput(parent);
dput(next);
if (!err)
ovl_dentry_set_flag(OVL_E_CONNECTED, dentry);
return err ?: origin_layer;
}
/*
* We only need to encode origin if there is a chance that the same object was
* encoded pre copy up and then we need to stay consistent with the same
* encoding also after copy up. If non-pure upper is not indexed, then it was
* copied up before NFS export was enabled. In that case we don't need to worry
* about staying consistent with pre copy up encoding and we encode an upper
* file handle. Overlay root dentry is a private case of non-indexed upper.
*
* The following table summarizes the different file handle encodings used for
* different overlay object types:
*
* Object type | Encoding
* --------------------------------
* Pure upper | U
* Non-indexed upper | U
* Indexed upper | L (*)
* Non-upper | L (*)
*
* U = upper file handle
* L = lower file handle
*
* (*) Connecting an overlay dir from real lower dentry is not always
* possible when there are redirects in lower layers and non-indexed merge dirs.
* To mitigate those case, we may copy up the lower dir ancestor before encode
* a lower dir file handle.
*
* Return 0 for upper file handle, > 0 for lower file handle or < 0 on error.
*/
static int ovl_check_encode_origin(struct dentry *dentry)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
/* Upper file handle for pure upper */
if (!ovl_dentry_lower(dentry))
return 0;
/*
* Upper file handle for non-indexed upper.
*
* Root is never indexed, so if there's an upper layer, encode upper for
* root.
*/
if (ovl_dentry_upper(dentry) &&
!ovl_test_flag(OVL_INDEX, d_inode(dentry)))
return 0;
/*
* Decoding a merge dir, whose origin's ancestor is under a redirected
* lower dir or under a non-indexed upper is not always possible.
* ovl_connect_layer() will try to make origin's layer "connected" by
* copying up a "connectable" ancestor.
*/
if (d_is_dir(dentry) && ofs->upper_mnt)
return ovl_connect_layer(dentry);
/* Lower file handle for indexed and non-upper dir/non-dir */
return 1;
}
static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
{
struct ovl_fh *fh = NULL;
int err, enc_lower;
/*
* Check if we should encode a lower or upper file handle and maybe
* copy up an ancestor to make lower file handle connectable.
*/
err = enc_lower = ovl_check_encode_origin(dentry);
if (enc_lower < 0)
goto fail;
/* Encode an upper or lower file handle */
fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) :
ovl_dentry_upper(dentry), !enc_lower);
err = PTR_ERR(fh);
if (IS_ERR(fh))
goto fail;
err = -EOVERFLOW;
if (fh->len > buflen)
goto fail;
memcpy(buf, (char *)fh, fh->len);
err = fh->len;
out:
kfree(fh);
return err;
fail:
pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
dentry, err, buflen, fh ? (int)fh->len : 0,
fh ? fh->type : 0);
goto out;
}
static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
{
int res, len = *max_len << 2;
res = ovl_d_to_fh(dentry, (char *)fid, len);
if (res <= 0)
return FILEID_INVALID;
len = res;
/* Round up to dwords */
*max_len = (len + 3) >> 2;
return OVL_FILEID;
}
static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
struct inode *parent)
{
struct dentry *dentry;
int type;
/* TODO: encode connectable file handles */
if (parent)
return FILEID_INVALID;
dentry = d_find_any_alias(inode);
if (WARN_ON(!dentry))
return FILEID_INVALID;
type = ovl_dentry_to_fh(dentry, fid, max_len);
dput(dentry);
return type;
}
/*
* Find or instantiate an overlay dentry from real dentries and index.
*/
static struct dentry *ovl_obtain_alias(struct super_block *sb,
struct dentry *upper_alias,
struct ovl_path *lowerpath,
struct dentry *index)
{
struct dentry *lower = lowerpath ? lowerpath->dentry : NULL;
struct dentry *upper = upper_alias ?: index;
struct dentry *dentry;
struct inode *inode;
struct ovl_entry *oe;
struct ovl_inode_params oip = {
.lowerpath = lowerpath,
.index = index,
.numlower = !!lower
};
/* We get overlay directory dentries with ovl_lookup_real() */
if (d_is_dir(upper ?: lower))
return ERR_PTR(-EIO);
oip.upperdentry = dget(upper);
inode = ovl_get_inode(sb, &oip);
if (IS_ERR(inode)) {
dput(upper);
return ERR_CAST(inode);
}
dentry = d_find_any_alias(inode);
if (!dentry) {
dentry = d_alloc_anon(inode->i_sb);
if (!dentry)
goto nomem;
oe = ovl_alloc_entry(lower ? 1 : 0);
if (!oe)
goto nomem;
if (lower) {
oe->lowerstack->dentry = dget(lower);
oe->lowerstack->layer = lowerpath->layer;
}
dentry->d_fsdata = oe;
if (upper_alias)
ovl_dentry_set_upper_alias(dentry);
}
return d_instantiate_anon(dentry, inode);
nomem:
iput(inode);
dput(dentry);
return ERR_PTR(-ENOMEM);
}
/* Get the upper or lower dentry in stach whose on layer @idx */
static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
{
struct ovl_entry *oe = dentry->d_fsdata;
int i;
if (!idx)
return ovl_dentry_upper(dentry);
for (i = 0; i < oe->numlower; i++) {
if (oe->lowerstack[i].layer->idx == idx)
return oe->lowerstack[i].dentry;
}
return NULL;
}
/*
* Lookup a child overlay dentry to get a connected overlay dentry whose real
* dentry is @real. If @real is on upper layer, we lookup a child overlay
* dentry with the same name as the real dentry. Otherwise, we need to consult
* index for lookup.
*/
static struct dentry *ovl_lookup_real_one(struct dentry *connected,
struct dentry *real,
struct ovl_layer *layer)
{
struct inode *dir = d_inode(connected);
struct dentry *this, *parent = NULL;
struct name_snapshot name;
int err;
/*
* Lookup child overlay dentry by real name. The dir mutex protects us
* from racing with overlay rename. If the overlay dentry that is above
* real has already been moved to a parent that is not under the
* connected overlay dir, we return -ECHILD and restart the lookup of
* connected real path from the top.
*/
inode_lock_nested(dir, I_MUTEX_PARENT);
err = -ECHILD;
parent = dget_parent(real);
if (ovl_dentry_real_at(connected, layer->idx) != parent)
goto fail;
/*
* We also need to take a snapshot of real dentry name to protect us
* from racing with underlying layer rename. In this case, we don't
* care about returning ESTALE, only from dereferencing a free name
* pointer because we hold no lock on the real dentry.
*/
take_dentry_name_snapshot(&name, real);
this = lookup_one_len(name.name, connected, strlen(name.name));
err = PTR_ERR(this);
if (IS_ERR(this)) {
goto fail;
} else if (!this || !this->d_inode) {
dput(this);
err = -ENOENT;
goto fail;
} else if (ovl_dentry_real_at(this, layer->idx) != real) {
dput(this);
err = -ESTALE;
goto fail;
}
out:
release_dentry_name_snapshot(&name);
dput(parent);
inode_unlock(dir);
return this;
fail:
pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
real, layer->idx, connected, err);
this = ERR_PTR(err);
goto out;
}
static struct dentry *ovl_lookup_real(struct super_block *sb,
struct dentry *real,
struct ovl_layer *layer);
/*
* Lookup an indexed or hashed overlay dentry by real inode.
*/
static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
struct dentry *real,
struct ovl_layer *layer)
{
struct ovl_fs *ofs = sb->s_fs_info;
struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
struct dentry *index = NULL;
struct dentry *this = NULL;
struct inode *inode;
/*
* Decoding upper dir from index is expensive, so first try to lookup
* overlay dentry in inode/dcache.
*/
inode = ovl_lookup_inode(sb, real, !layer->idx);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (inode) {
this = d_find_any_alias(inode);
iput(inode);
}
/*
* For decoded lower dir file handle, lookup index by origin to check
* if lower dir was copied up and and/or removed.
*/
if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) {
index = ovl_lookup_index(ofs, NULL, real, false);
if (IS_ERR(index))
return index;
}
/* Get connected upper overlay dir from index */
if (index) {
struct dentry *upper = ovl_index_upper(ofs, index);
dput(index);
if (IS_ERR_OR_NULL(upper))
return upper;
/*
* ovl_lookup_real() in lower layer may call recursively once to
* ovl_lookup_real() in upper layer. The first level call walks
* back lower parents to the topmost indexed parent. The second
* recursive call walks back from indexed upper to the topmost
* connected/hashed upper parent (or up to root).
*/
this = ovl_lookup_real(sb, upper, &upper_layer);
dput(upper);
}
if (IS_ERR_OR_NULL(this))
return this;
if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
dput(this);
this = ERR_PTR(-EIO);
}
return this;
}
/*
* Lookup an indexed or hashed overlay dentry, whose real dentry is an
* ancestor of @real.
*/
static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
struct dentry *real,
struct ovl_layer *layer)
{
struct dentry *next, *parent = NULL;
struct dentry *ancestor = ERR_PTR(-EIO);
if (real == layer->mnt->mnt_root)
return dget(sb->s_root);
/* Find the topmost indexed or hashed ancestor */
next = dget(real);
for (;;) {
parent = dget_parent(next);
/*
* Lookup a matching overlay dentry in inode/dentry
* cache or in index by real inode.
*/
ancestor = ovl_lookup_real_inode(sb, next, layer);
if (ancestor)
break;
if (parent == layer->mnt->mnt_root) {
ancestor = dget(sb->s_root);
break;
}
/*
* If @real has been moved out of the layer root directory,
* we will eventully hit the real fs root. This cannot happen
* by legit overlay rename, so we return error in that case.
*/
if (parent == next) {
ancestor = ERR_PTR(-EXDEV);
break;
}
dput(next);
next = parent;
}
dput(parent);
dput(next);
return ancestor;
}
/*
* Lookup a connected overlay dentry whose real dentry is @real.
* If @real is on upper layer, we lookup a child overlay dentry with the same
* path the real dentry. Otherwise, we need to consult index for lookup.
*/
static struct dentry *ovl_lookup_real(struct super_block *sb,
struct dentry *real,
struct ovl_layer *layer)
{
struct dentry *connected;
int err = 0;
connected = ovl_lookup_real_ancestor(sb, real, layer);
if (IS_ERR(connected))
return connected;
while (!err) {
struct dentry *next, *this;
struct dentry *parent = NULL;
struct dentry *real_connected = ovl_dentry_real_at(connected,
layer->idx);
if (real_connected == real)
break;
/* Find the topmost dentry not yet connected */
next = dget(real);
for (;;) {
parent = dget_parent(next);
if (parent == real_connected)
break;
/*
* If real has been moved out of 'real_connected',
* we will not find 'real_connected' and hit the layer
* root. In that case, we need to restart connecting.
* This game can go on forever in the worst case. We
* may want to consider taking s_vfs_rename_mutex if
* this happens more than once.
*/
if (parent == layer->mnt->mnt_root) {
dput(connected);
connected = dget(sb->s_root);
break;
}
/*
* If real file has been moved out of the layer root
* directory, we will eventully hit the real fs root.
* This cannot happen by legit overlay rename, so we
* return error in that case.
*/
if (parent == next) {
err = -EXDEV;
break;
}
dput(next);
next = parent;
}
if (!err) {
this = ovl_lookup_real_one(connected, next, layer);
if (IS_ERR(this))
err = PTR_ERR(this);
/*
* Lookup of child in overlay can fail when racing with
* overlay rename of child away from 'connected' parent.
* In this case, we need to restart the lookup from the
* top, because we cannot trust that 'real_connected' is
* still an ancestor of 'real'. There is a good chance
* that the renamed overlay ancestor is now in cache, so
* ovl_lookup_real_ancestor() will find it and we can
* continue to connect exactly from where lookup failed.
*/
if (err == -ECHILD) {
this = ovl_lookup_real_ancestor(sb, real,
layer);
err = PTR_ERR_OR_ZERO(this);
}
if (!err) {
dput(connected);
connected = this;
}
}
dput(parent);
dput(next);
}
if (err)
goto fail;
return connected;
fail:
pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
real, layer->idx, connected, err);
dput(connected);
return ERR_PTR(err);
}
/*
* Get an overlay dentry from upper/lower real dentries and index.
*/
static struct dentry *ovl_get_dentry(struct super_block *sb,
struct dentry *upper,
struct ovl_path *lowerpath,
struct dentry *index)
{
struct ovl_fs *ofs = sb->s_fs_info;
struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer;
struct dentry *real = upper ?: (index ?: lowerpath->dentry);
/*
* Obtain a disconnected overlay dentry from a non-dir real dentry
* and index.
*/
if (!d_is_dir(real))
return ovl_obtain_alias(sb, upper, lowerpath, index);
/* Removed empty directory? */
if ((real->d_flags & DCACHE_DISCONNECTED) || d_unhashed(real))
return ERR_PTR(-ENOENT);
/*
* If real dentry is connected and hashed, get a connected overlay
* dentry whose real dentry is @real.
*/
return ovl_lookup_real(sb, real, layer);
}
static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
struct ovl_fh *fh)
{
struct ovl_fs *ofs = sb->s_fs_info;
struct dentry *dentry;
struct dentry *upper;
if (!ofs->upper_mnt)
return ERR_PTR(-EACCES);
upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
if (IS_ERR_OR_NULL(upper))
return upper;
dentry = ovl_get_dentry(sb, upper, NULL, NULL);
dput(upper);
return dentry;
}
static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
struct ovl_fh *fh)
{
struct ovl_fs *ofs = sb->s_fs_info;
struct ovl_path origin = { };
struct ovl_path *stack = &origin;
struct dentry *dentry = NULL;
struct dentry *index = NULL;
struct inode *inode;
int err;
/* First lookup overlay inode in inode cache by origin fh */
err = ovl_check_origin_fh(ofs, fh, false, NULL, &stack);
if (err)
return ERR_PTR(err);
if (!d_is_dir(origin.dentry) ||
!(origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
inode = ovl_lookup_inode(sb, origin.dentry, false);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_err;
if (inode) {
dentry = d_find_any_alias(inode);
iput(inode);
if (dentry)
goto out;
}
}
/* Then lookup indexed upper/whiteout by origin fh */
if (ofs->indexdir) {
index = ovl_get_index_fh(ofs, fh);
err = PTR_ERR(index);
if (IS_ERR(index)) {
index = NULL;
goto out_err;
}
}
/* Then try to get a connected upper dir by index */
if (index && d_is_dir(index)) {
struct dentry *upper = ovl_index_upper(ofs, index);
err = PTR_ERR(upper);
if (IS_ERR_OR_NULL(upper))
goto out_err;
dentry = ovl_get_dentry(sb, upper, NULL, NULL);
dput(upper);
goto out;
}
/* Otherwise, get a connected non-upper dir or disconnected non-dir */
if (d_is_dir(origin.dentry) &&
(origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
dput(origin.dentry);
origin.dentry = NULL;
err = ovl_check_origin_fh(ofs, fh, true, NULL, &stack);
if (err)
goto out_err;
}
if (index) {
err = ovl_verify_origin(index, origin.dentry, false);
if (err)
goto out_err;
}
dentry = ovl_get_dentry(sb, NULL, &origin, index);
out:
dput(origin.dentry);
dput(index);
return dentry;
out_err:
dentry = ERR_PTR(err);
goto out;
}
static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
struct dentry *dentry = NULL;
struct ovl_fh *fh = (struct ovl_fh *) fid;
int len = fh_len << 2;
unsigned int flags = 0;
int err;
err = -EINVAL;
if (fh_type != OVL_FILEID)
goto out_err;
err = ovl_check_fh_len(fh, len);
if (err)
goto out_err;
flags = fh->flags;
dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ?
ovl_upper_fh_to_d(sb, fh) :
ovl_lower_fh_to_d(sb, fh);
err = PTR_ERR(dentry);
if (IS_ERR(dentry) && err != -ESTALE)
goto out_err;
return dentry;
out_err:
pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
len, fh_type, flags, err);
return ERR_PTR(err);
}
static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
return ERR_PTR(-EACCES);
}
static int ovl_get_name(struct dentry *parent, char *name,
struct dentry *child)
{
/*
* ovl_fh_to_dentry() returns connected dir overlay dentries and
* ovl_fh_to_parent() is not implemented, so we should not get here.
*/
WARN_ON_ONCE(1);
return -EIO;
}
static struct dentry *ovl_get_parent(struct dentry *dentry)
{
/*
* ovl_fh_to_dentry() returns connected dir overlay dentries, so we
* should not get here.
*/
WARN_ON_ONCE(1);
return ERR_PTR(-EIO);
}
const struct export_operations ovl_export_operations = {
.encode_fh = ovl_encode_fh,
.fh_to_dentry = ovl_fh_to_dentry,
.fh_to_parent = ovl_fh_to_parent,
.get_name = ovl_get_name,
.get_parent = ovl_get_parent,
};

View File

@@ -0,0 +1,843 @@
/*
*
* Copyright (C) 2011 Novell Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
{
int err;
struct dentry *upperdentry;
const struct cred *old_cred;
/*
* Check for permissions before trying to copy-up. This is redundant
* since it will be rechecked later by ->setattr() on upper dentry. But
* without this, copy-up can be triggered by just about anybody.
*
* We don't initialize inode->size, which just means that
* inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not
* check for a swapfile (which this won't be anyway).
*/
err = setattr_prepare(dentry, attr);
if (err)
return err;
err = ovl_want_write(dentry);
if (err)
goto out;
err = ovl_copy_up(dentry);
if (!err) {
upperdentry = ovl_dentry_upper(dentry);
if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
attr->ia_valid &= ~ATTR_MODE;
inode_lock(upperdentry->d_inode);
old_cred = ovl_override_creds(dentry->d_sb);
err = notify_change(upperdentry, attr, NULL);
revert_creds(old_cred);
if (!err)
ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
inode_unlock(upperdentry->d_inode);
}
ovl_drop_write(dentry);
out:
return err;
}
static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
struct ovl_layer *lower_layer)
{
bool samefs = ovl_same_sb(dentry->d_sb);
unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
if (samefs) {
/*
* When all layers are on the same fs, all real inode
* number are unique, so we use the overlay st_dev,
* which is friendly to du -x.
*/
stat->dev = dentry->d_sb->s_dev;
return 0;
} else if (xinobits) {
unsigned int shift = 64 - xinobits;
/*
* All inode numbers of underlying fs should not be using the
* high xinobits, so we use high xinobits to partition the
* overlay st_ino address space. The high bits holds the fsid
* (upper fsid is 0). This way overlay inode numbers are unique
* and all inodes use overlay st_dev. Inode numbers are also
* persistent for a given layer configuration.
*/
if (stat->ino >> shift) {
pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
dentry, stat->ino, xinobits);
} else {
if (lower_layer)
stat->ino |= ((u64)lower_layer->fsid) << shift;
stat->dev = dentry->d_sb->s_dev;
return 0;
}
}
/* The inode could not be mapped to a unified st_ino address space */
if (S_ISDIR(dentry->d_inode->i_mode)) {
/*
* Always use the overlay st_dev for directories, so 'find
* -xdev' will scan the entire overlay mount and won't cross the
* overlay mount boundaries.
*
* If not all layers are on the same fs the pair {real st_ino;
* overlay st_dev} is not unique, so use the non persistent
* overlay st_ino for directories.
*/
stat->dev = dentry->d_sb->s_dev;
stat->ino = dentry->d_inode->i_ino;
} else if (lower_layer && lower_layer->fsid) {
/*
* For non-samefs setup, if we cannot map all layers st_ino
* to a unified address space, we need to make sure that st_dev
* is unique per lower fs. Upper layer uses real st_dev and
* lower layers use the unique anonymous bdev assigned to the
* lower fs.
*/
stat->dev = lower_layer->fs->pseudo_dev;
}
return 0;
}
int ovl_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct dentry *dentry = path->dentry;
enum ovl_path_type type;
struct path realpath;
const struct cred *old_cred;
bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
bool samefs = ovl_same_sb(dentry->d_sb);
struct ovl_layer *lower_layer = NULL;
int err;
type = ovl_path_real(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
err = vfs_getattr(&realpath, stat, request_mask, flags);
if (err)
goto out;
/*
* For non-dir or same fs, we use st_ino of the copy up origin.
* This guaranties constant st_dev/st_ino across copy up.
* With xino feature and non-samefs, we use st_ino of the copy up
* origin masked with high bits that represent the layer id.
*
* If lower filesystem supports NFS file handles, this also guaranties
* persistent st_ino across mount cycle.
*/
if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) {
if (!OVL_TYPE_UPPER(type)) {
lower_layer = ovl_layer_lower(dentry);
} else if (OVL_TYPE_ORIGIN(type)) {
struct kstat lowerstat;
u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
ovl_path_lower(dentry, &realpath);
err = vfs_getattr(&realpath, &lowerstat,
lowermask, flags);
if (err)
goto out;
/*
* Lower hardlinks may be broken on copy up to different
* upper files, so we cannot use the lower origin st_ino
* for those different files, even for the same fs case.
*
* Similarly, several redirected dirs can point to the
* same dir on a lower layer. With the "verify_lower"
* feature, we do not use the lower origin st_ino, if
* we haven't verified that this redirect is unique.
*
* With inodes index enabled, it is safe to use st_ino
* of an indexed origin. The index validates that the
* upper hardlink is not broken and that a redirected
* dir is the only redirect to that origin.
*/
if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
(!ovl_verify_lower(dentry->d_sb) &&
(is_dir || lowerstat.nlink == 1))) {
stat->ino = lowerstat.ino;
lower_layer = ovl_layer_lower(dentry);
}
}
}
err = ovl_map_dev_ino(dentry, stat, lower_layer);
if (err)
goto out;
/*
* It's probably not worth it to count subdirs to get the
* correct link count. nlink=1 seems to pacify 'find' and
* other utilities.
*/
if (is_dir && OVL_TYPE_MERGE(type))
stat->nlink = 1;
/*
* Return the overlay inode nlinks for indexed upper inodes.
* Overlay inode nlink counts the union of the upper hardlinks
* and non-covered lower hardlinks. It does not include the upper
* index hardlink.
*/
if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry)))
stat->nlink = dentry->d_inode->i_nlink;
out:
revert_creds(old_cred);
return err;
}
int ovl_permission(struct inode *inode, int mask)
{
struct inode *upperinode = ovl_inode_upper(inode);
struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
const struct cred *old_cred;
int err;
/* Careful in RCU walk mode */
if (!realinode) {
WARN_ON(!(mask & MAY_NOT_BLOCK));
return -ECHILD;
}
/*
* Check overlay inode with the creds of task and underlying inode
* with creds of mounter
*/
err = generic_permission(inode, mask);
if (err)
return err;
old_cred = ovl_override_creds(inode->i_sb);
if (!upperinode &&
!special_file(realinode->i_mode) && mask & MAY_WRITE) {
mask &= ~(MAY_WRITE | MAY_APPEND);
/* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
}
err = inode_permission(realinode, mask);
revert_creds(old_cred);
return err;
}
static const char *ovl_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
const struct cred *old_cred;
const char *p;
if (!dentry)
return ERR_PTR(-ECHILD);
old_cred = ovl_override_creds(dentry->d_sb);
p = vfs_get_link(ovl_dentry_real(dentry), done);
revert_creds(old_cred);
return p;
}
bool ovl_is_private_xattr(const char *name)
{
return strncmp(name, OVL_XATTR_PREFIX,
sizeof(OVL_XATTR_PREFIX) - 1) == 0;
}
int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags)
{
int err;
struct dentry *upperdentry = ovl_i_dentry_upper(inode);
struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
const struct cred *old_cred;
err = ovl_want_write(dentry);
if (err)
goto out;
if (!value && !upperdentry) {
err = vfs_getxattr(realdentry, name, NULL, 0);
if (err < 0)
goto out_drop_write;
}
if (!upperdentry) {
err = ovl_copy_up(dentry);
if (err)
goto out_drop_write;
realdentry = ovl_dentry_upper(dentry);
}
old_cred = ovl_override_creds(dentry->d_sb);
if (value)
err = vfs_setxattr(realdentry, name, value, size, flags);
else {
WARN_ON(flags != XATTR_REPLACE);
err = vfs_removexattr(realdentry, name);
}
revert_creds(old_cred);
out_drop_write:
ovl_drop_write(dentry);
out:
return err;
}
int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size)
{
ssize_t res;
const struct cred *old_cred;
struct dentry *realdentry =
ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
old_cred = ovl_override_creds(dentry->d_sb);
res = vfs_getxattr(realdentry, name, value, size);
revert_creds(old_cred);
return res;
}
static bool ovl_can_list(const char *s)
{
/* List all non-trusted xatts */
if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
return true;
/* Never list trusted.overlay, list other trusted for superuser only */
return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
}
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
struct dentry *realdentry = ovl_dentry_real(dentry);
ssize_t res;
size_t len;
char *s;
const struct cred *old_cred;
old_cred = ovl_override_creds(dentry->d_sb);
res = vfs_listxattr(realdentry, list, size);
revert_creds(old_cred);
if (res <= 0 || size == 0)
return res;
/* filter out private xattrs */
for (s = list, len = res; len;) {
size_t slen = strnlen(s, len) + 1;
/* underlying fs providing us with an broken xattr list? */
if (WARN_ON(slen > len))
return -EIO;
len -= slen;
if (!ovl_can_list(s)) {
res -= slen;
memmove(s, s + slen, len);
} else {
s += slen;
}
}
return res;
}
struct posix_acl *ovl_get_acl(struct inode *inode, int type)
{
struct inode *realinode = ovl_inode_real(inode);
const struct cred *old_cred;
struct posix_acl *acl;
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
return NULL;
old_cred = ovl_override_creds(inode->i_sb);
acl = get_acl(realinode, type);
revert_creds(old_cred);
return acl;
}
static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
{
/* Copy up of disconnected dentry does not set upper alias */
if (ovl_dentry_upper(dentry) &&
(ovl_dentry_has_upper_alias(dentry) ||
(dentry->d_flags & DCACHE_DISCONNECTED)))
return false;
if (special_file(d_inode(dentry)->i_mode))
return false;
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
return false;
return true;
}
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
{
int err = 0;
if (ovl_open_need_copy_up(dentry, file_flags)) {
err = ovl_want_write(dentry);
if (!err) {
err = ovl_copy_up_flags(dentry, file_flags);
ovl_drop_write(dentry);
}
}
return err;
}
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
{
if (flags & S_ATIME) {
struct ovl_fs *ofs = inode->i_sb->s_fs_info;
struct path upperpath = {
.mnt = ofs->upper_mnt,
.dentry = ovl_upperdentry_dereference(OVL_I(inode)),
};
if (upperpath.dentry) {
touch_atime(&upperpath);
inode->i_atime = d_inode(upperpath.dentry)->i_atime;
}
}
return 0;
}
static const struct inode_operations ovl_file_inode_operations = {
.setattr = ovl_setattr,
.permission = ovl_permission,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
};
static const struct inode_operations ovl_symlink_inode_operations = {
.setattr = ovl_setattr,
.get_link = ovl_get_link,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
.update_time = ovl_update_time,
};
/*
* It is possible to stack overlayfs instance on top of another
* overlayfs instance as lower layer. We need to annonate the
* stackable i_mutex locks according to stack level of the super
* block instance. An overlayfs instance can never be in stack
* depth 0 (there is always a real fs below it). An overlayfs
* inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
*
* For example, here is a snip from /proc/lockdep_chains after
* dir_iterate of nested overlayfs:
*
* [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2)
* [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
* [...] &type->i_mutex_dir_key (stack_depth=0)
*/
#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
{
#ifdef CONFIG_LOCKDEP
static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING];
int depth = inode->i_sb->s_stack_depth - 1;
if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
depth = 0;
if (S_ISDIR(inode->i_mode))
lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
else
lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]);
#endif
}
static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
unsigned long ino, int fsid)
{
int xinobits = ovl_xino_bits(inode->i_sb);
/*
* When NFS export is enabled and d_ino is consistent with st_ino
* (samefs or i_ino has enough bits to encode layer), set the same
* value used for d_ino to i_ino, because nfsd readdirplus compares
* d_ino values to i_ino values of child entries. When called from
* ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
* upper inode i_ino on ovl_inode_init() or ovl_inode_update().
*/
if (inode->i_sb->s_export_op &&
(ovl_same_sb(inode->i_sb) || xinobits)) {
inode->i_ino = ino;
if (xinobits && fsid && !(ino >> (64 - xinobits)))
inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
} else {
inode->i_ino = get_next_ino();
}
inode->i_mode = mode;
inode->i_flags |= S_NOCMTIME;
#ifdef CONFIG_FS_POSIX_ACL
inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
#endif
ovl_lockdep_annotate_inode_mutex_key(inode);
switch (mode & S_IFMT) {
case S_IFREG:
inode->i_op = &ovl_file_inode_operations;
break;
case S_IFDIR:
inode->i_op = &ovl_dir_inode_operations;
inode->i_fop = &ovl_dir_operations;
break;
case S_IFLNK:
inode->i_op = &ovl_symlink_inode_operations;
break;
default:
inode->i_op = &ovl_file_inode_operations;
init_special_inode(inode, mode, rdev);
break;
}
}
/*
* With inodes index enabled, an overlay inode nlink counts the union of upper
* hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure
* upper inode, the following nlink modifying operations can happen:
*
* 1. Lower hardlink copy up
* 2. Upper hardlink created, unlinked or renamed over
* 3. Lower hardlink whiteout or renamed over
*
* For the first, copy up case, the union nlink does not change, whether the
* operation succeeds or fails, but the upper inode nlink may change.
* Therefore, before copy up, we store the union nlink value relative to the
* lower inode nlink in the index inode xattr trusted.overlay.nlink.
*
* For the second, upper hardlink case, the union nlink should be incremented
* or decremented IFF the operation succeeds, aligned with nlink change of the
* upper inode. Therefore, before link/unlink/rename, we store the union nlink
* value relative to the upper inode nlink in the index inode.
*
* For the last, lower cover up case, we simplify things by preceding the
* whiteout or cover up with copy up. This makes sure that there is an index
* upper inode where the nlink xattr can be stored before the copied up upper
* entry is unlink.
*/
#define OVL_NLINK_ADD_UPPER (1 << 0)
/*
* On-disk format for indexed nlink:
*
* nlink relative to the upper inode - "U[+-]NUM"
* nlink relative to the lower inode - "L[+-]NUM"
*/
static int ovl_set_nlink_common(struct dentry *dentry,
struct dentry *realdentry, const char *format)
{
struct inode *inode = d_inode(dentry);
struct inode *realinode = d_inode(realdentry);
char buf[13];
int len;
len = snprintf(buf, sizeof(buf), format,
(int) (inode->i_nlink - realinode->i_nlink));
if (WARN_ON(len >= sizeof(buf)))
return -EIO;
return ovl_do_setxattr(ovl_dentry_upper(dentry),
OVL_XATTR_NLINK, buf, len, 0);
}
int ovl_set_nlink_upper(struct dentry *dentry)
{
return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i");
}
int ovl_set_nlink_lower(struct dentry *dentry)
{
return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
}
unsigned int ovl_get_nlink(struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback)
{
int nlink_diff;
int nlink;
char buf[13];
int err;
if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
return fallback;
err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);
if (err < 0)
goto fail;
buf[err] = '\0';
if ((buf[0] != 'L' && buf[0] != 'U') ||
(buf[1] != '+' && buf[1] != '-'))
goto fail;
err = kstrtoint(buf + 1, 10, &nlink_diff);
if (err < 0)
goto fail;
nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink;
nlink += nlink_diff;
if (nlink <= 0)
goto fail;
return nlink;
fail:
pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
upperdentry, err);
return fallback;
}
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
{
struct inode *inode;
inode = new_inode(sb);
if (inode)
ovl_fill_inode(inode, mode, rdev, 0, 0);
return inode;
}
static int ovl_inode_test(struct inode *inode, void *data)
{
return inode->i_private == data;
}
static int ovl_inode_set(struct inode *inode, void *data)
{
inode->i_private = data;
return 0;
}
static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
struct dentry *upperdentry, bool strict)
{
/*
* For directories, @strict verify from lookup path performs consistency
* checks, so NULL lower/upper in dentry must match NULL lower/upper in
* inode. Non @strict verify from NFS handle decode path passes NULL for
* 'unknown' lower/upper.
*/
if (S_ISDIR(inode->i_mode) && strict) {
/* Real lower dir moved to upper layer under us? */
if (!lowerdentry && ovl_inode_lower(inode))
return false;
/* Lookup of an uncovered redirect origin? */
if (!upperdentry && ovl_inode_upper(inode))
return false;
}
/*
* Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL.
* This happens when finding a copied up overlay inode for a renamed
* or hardlinked overlay dentry and lower dentry cannot be followed
* by origin because lower fs does not support file handles.
*/
if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry))
return false;
/*
* Allow non-NULL __upperdentry in inode even if upperdentry is NULL.
* This happens when finding a lower alias for a copied up hard link.
*/
if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry))
return false;
return true;
}
struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
bool is_upper)
{
struct inode *inode, *key = d_inode(real);
inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
if (!inode)
return NULL;
if (!ovl_verify_inode(inode, is_upper ? NULL : real,
is_upper ? real : NULL, false)) {
iput(inode);
return ERR_PTR(-ESTALE);
}
return inode;
}
/*
* Does overlay inode need to be hashed by lower inode?
*/
static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
struct dentry *lower, struct dentry *index)
{
struct ovl_fs *ofs = sb->s_fs_info;
/* No, if pure upper */
if (!lower)
return false;
/* Yes, if already indexed */
if (index)
return true;
/* Yes, if won't be copied up */
if (!ofs->upper_mnt)
return true;
/* No, if lower hardlink is or will be broken on copy up */
if ((upper || !ovl_indexdir(sb)) &&
!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
return false;
/* No, if non-indexed upper with NFS export */
if (sb->s_export_op && upper)
return false;
/* Otherwise, hash by lower inode for fsnotify */
return true;
}
static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode,
struct inode *key)
{
return newinode ? inode_insert5(newinode, (unsigned long) key,
ovl_inode_test, ovl_inode_set, key) :
iget5_locked(sb, (unsigned long) key,
ovl_inode_test, ovl_inode_set, key);
}
struct inode *ovl_get_inode(struct super_block *sb,
struct ovl_inode_params *oip)
{
struct dentry *upperdentry = oip->upperdentry;
struct ovl_path *lowerpath = oip->lowerpath;
struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL;
bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
oip->index);
int fsid = bylower ? oip->lowerpath->layer->fsid : 0;
bool is_dir;
unsigned long ino = 0;
if (!realinode)
realinode = d_inode(lowerdentry);
/*
* Copy up origin (lower) may exist for non-indexed upper, but we must
* not use lower as hash key if this is a broken hardlink.
*/
is_dir = S_ISDIR(realinode->i_mode);
if (upperdentry || bylower) {
struct inode *key = d_inode(bylower ? lowerdentry :
upperdentry);
unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
inode = ovl_iget5(sb, oip->newinode, key);
if (!inode)
goto out_nomem;
if (!(inode->i_state & I_NEW)) {
/*
* Verify that the underlying files stored in the inode
* match those in the dentry.
*/
if (!ovl_verify_inode(inode, lowerdentry, upperdentry,
true)) {
iput(inode);
inode = ERR_PTR(-ESTALE);
goto out;
}
dput(upperdentry);
goto out;
}
/* Recalculate nlink for non-dir due to indexing */
if (!is_dir)
nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
set_nlink(inode, nlink);
ino = key->i_ino;
} else {
/* Lower hardlink that will be broken on copy up */
inode = new_inode(sb);
if (!inode)
goto out_nomem;
}
ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
ovl_inode_init(inode, upperdentry, lowerdentry);
if (upperdentry && ovl_is_impuredir(upperdentry))
ovl_set_flag(OVL_IMPURE, inode);
if (oip->index)
ovl_set_flag(OVL_INDEX, inode);
/* Check for non-merge dir that may have whiteouts */
if (is_dir) {
if (((upperdentry && lowerdentry) || oip->numlower > 1) ||
ovl_check_origin_xattr(upperdentry ?: lowerdentry)) {
ovl_set_flag(OVL_WHITEOUTS, inode);
}
}
if (inode->i_state & I_NEW)
unlock_new_inode(inode);
out:
return inode;
out_nomem:
inode = ERR_PTR(-ENOMEM);
goto out;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,381 @@
/*
*
* Copyright (C) 2011 Novell Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/uuid.h>
#include "ovl_entry.h"
enum ovl_path_type {
__OVL_PATH_UPPER = (1 << 0),
__OVL_PATH_MERGE = (1 << 1),
__OVL_PATH_ORIGIN = (1 << 2),
};
#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER)
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
#define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN)
#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
enum ovl_inode_flag {
/* Pure upper dir that may contain non pure upper entries */
OVL_IMPURE,
/* Non-merge dir that may contain whiteout entries */
OVL_WHITEOUTS,
OVL_INDEX,
};
enum ovl_entry_flag {
OVL_E_UPPER_ALIAS,
OVL_E_OPAQUE,
OVL_E_CONNECTED,
};
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
* where:
* origin.fh - exported file handle of the lower file
* origin.uuid - uuid of the lower filesystem
*/
#define OVL_FH_VERSION 0
#define OVL_FH_MAGIC 0xfb
/* CPU byte order required for fid decoding: */
#define OVL_FH_FLAG_BIG_ENDIAN (1 << 0)
#define OVL_FH_FLAG_ANY_ENDIAN (1 << 1)
/* Is the real inode encoded in fid an upper inode? */
#define OVL_FH_FLAG_PATH_UPPER (1 << 2)
#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \
OVL_FH_FLAG_PATH_UPPER)
#if defined(__LITTLE_ENDIAN)
#define OVL_FH_FLAG_CPU_ENDIAN 0
#elif defined(__BIG_ENDIAN)
#define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN
#else
#error Endianness not defined
#endif
/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */
#define OVL_FILEID 0xfb
/* On-disk and in-memeory format for redirect by file handle */
struct ovl_fh {
u8 version; /* 0 */
u8 magic; /* 0xfb */
u8 len; /* size of this header + size of fid */
u8 flags; /* OVL_FH_FLAG_* */
u8 type; /* fid_type of fid */
uuid_t uuid; /* uuid of filesystem */
u8 fid[0]; /* file identifier */
} __packed;
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
{
int err = vfs_rmdir(dir, dentry);
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
return err;
}
static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
{
int err = vfs_unlink(dir, dentry, NULL);
pr_debug("unlink(%pd2) = %i\n", dentry, err);
return err;
}
static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry)
{
int err = vfs_link(old_dentry, dir, new_dentry, NULL);
pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err);
return err;
}
static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
umode_t mode)
{
int err = vfs_create(dir, dentry, mode, true);
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
}
static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
umode_t mode)
{
int err = vfs_mkdir(dir, dentry, mode);
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
}
static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t dev)
{
int err = vfs_mknod(dir, dentry, mode, dev);
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err);
return err;
}
static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
const char *oldname)
{
int err = vfs_symlink(dir, dentry, oldname);
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
return err;
}
static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
int err = vfs_setxattr(dentry, name, value, size, flags);
pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
dentry, name, min((int)size, 48), value, size, flags, err);
return err;
}
static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
{
int err = vfs_removexattr(dentry, name);
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
return err;
}
static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
struct inode *newdir, struct dentry *newdentry,
unsigned int flags)
{
int err;
pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
if (err) {
pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
olddentry, newdentry, err);
}
return err;
}
static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
{
int err = vfs_whiteout(dir, dentry);
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
return err;
}
static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
{
struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
int err = PTR_ERR_OR_ZERO(ret);
pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
return ret;
}
/* util.c */
int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);
struct super_block *ovl_same_sb(struct super_block *sb);
int ovl_can_decode_fh(struct super_block *sb);
struct dentry *ovl_indexdir(struct super_block *sb);
bool ovl_index_all(struct super_block *sb);
bool ovl_verify_lower(struct super_block *sb);
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
bool ovl_dentry_remote(struct dentry *dentry);
bool ovl_dentry_weird(struct dentry *dentry);
enum ovl_path_type ovl_path_type(struct dentry *dentry);
void ovl_path_upper(struct dentry *dentry, struct path *path);
void ovl_path_lower(struct dentry *dentry, struct path *path);
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
struct dentry *ovl_i_dentry_upper(struct inode *inode);
struct inode *ovl_inode_upper(struct inode *inode);
struct inode *ovl_inode_lower(struct inode *inode);
struct inode *ovl_inode_real(struct inode *inode);
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry);
bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry);
bool ovl_dentry_is_opaque(struct dentry *dentry);
bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
bool ovl_dentry_has_upper_alias(struct dentry *dentry);
void ovl_dentry_set_upper_alias(struct dentry *dentry);
bool ovl_redirect_dir(struct super_block *sb);
const char *ovl_dentry_get_redirect(struct dentry *dentry);
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
struct dentry *lowerdentry);
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
void ovl_dentry_version_inc(struct dentry *dentry, bool impurity);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry);
void ovl_copy_up_end(struct dentry *dentry);
bool ovl_check_origin_xattr(struct dentry *dentry);
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
const char *name, const void *value, size_t size,
int xerr);
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
void ovl_set_flag(unsigned long flag, struct inode *inode);
void ovl_clear_flag(unsigned long flag, struct inode *inode);
bool ovl_test_flag(unsigned long flag, struct inode *inode);
bool ovl_inuse_trylock(struct dentry *dentry);
void ovl_inuse_unlock(struct dentry *dentry);
bool ovl_need_index(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry, bool *locked);
void ovl_nlink_end(struct dentry *dentry, bool locked);
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
static inline bool ovl_is_impuredir(struct dentry *dentry)
{
return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
}
static inline unsigned int ovl_xino_bits(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
return ofs->xino_bits;
}
/* namei.c */
int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
bool connected);
int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
struct dentry *upperdentry, struct ovl_path **stackp);
int ovl_verify_set_fh(struct dentry *dentry, const char *name,
struct dentry *real, bool is_upper, bool set);
struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
int ovl_get_index_name(struct dentry *origin, struct qstr *name);
struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool verify);
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags);
bool ovl_lower_positive(struct dentry *dentry);
static inline int ovl_verify_origin(struct dentry *upper,
struct dentry *origin, bool set)
{
return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set);
}
static inline int ovl_verify_upper(struct dentry *index,
struct dentry *upper, bool set)
{
return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set);
}
/* readdir.c */
extern const struct file_operations ovl_dir_operations;
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
void ovl_cache_free(struct list_head *list);
void ovl_dir_cache_free(struct inode *inode);
int ovl_check_d_type_supported(struct path *realpath);
void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
struct dentry *dentry, int level);
int ovl_indexdir_cleanup(struct ovl_fs *ofs);
/* inode.c */
int ovl_set_nlink_upper(struct dentry *dentry);
int ovl_set_nlink_lower(struct dentry *dentry);
unsigned int ovl_get_nlink(struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback);
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
int ovl_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
int ovl_permission(struct inode *inode, int mask);
int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
const void *value, size_t size, int flags);
int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size);
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
struct posix_acl *ovl_get_acl(struct inode *inode, int type);
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
bool ovl_is_private_xattr(const char *name);
struct ovl_inode_params {
struct inode *newinode;
struct dentry *upperdentry;
struct ovl_path *lowerpath;
struct dentry *index;
unsigned int numlower;
};
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
bool is_upper);
struct inode *ovl_get_inode(struct super_block *sb,
struct ovl_inode_params *oip);
static inline void ovl_copyattr(struct inode *from, struct inode *to)
{
to->i_uid = from->i_uid;
to->i_gid = from->i_gid;
to->i_mode = from->i_mode;
to->i_atime = from->i_atime;
to->i_mtime = from->i_mtime;
to->i_ctime = from->i_ctime;
}
/* dir.c */
extern const struct inode_operations ovl_dir_inode_operations;
int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
struct dentry *dentry);
struct ovl_cattr {
dev_t rdev;
umode_t mode;
const char *link;
struct dentry *hardlink;
};
#define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) })
struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
struct ovl_cattr *attr);
int ovl_cleanup(struct inode *dir, struct dentry *dentry);
struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_flags(struct dentry *dentry, int flags);
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper);
/* export.c */
extern const struct export_operations ovl_export_operations;

View File

@@ -0,0 +1,111 @@
/*
*
* Copyright (C) 2011 Novell Inc.
* Copyright (C) 2016 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
struct ovl_config {
char *lowerdir;
char *upperdir;
char *workdir;
bool default_permissions;
bool redirect_dir;
bool redirect_follow;
const char *redirect_mode;
bool index;
bool nfs_export;
int xino;
};
struct ovl_sb {
struct super_block *sb;
dev_t pseudo_dev;
};
struct ovl_layer {
struct vfsmount *mnt;
struct ovl_sb *fs;
/* Index of this layer in fs root (upper idx == 0) */
int idx;
/* One fsid per unique underlying sb (upper fsid == 0) */
int fsid;
};
struct ovl_path {
struct ovl_layer *layer;
struct dentry *dentry;
};
/* private information held for overlayfs's superblock */
struct ovl_fs {
struct vfsmount *upper_mnt;
unsigned int numlower;
/* Number of unique lower sb that differ from upper sb */
unsigned int numlowerfs;
struct ovl_layer *lower_layers;
struct ovl_sb *lower_fs;
/* workbasedir is the path at workdir= mount option */
struct dentry *workbasedir;
/* workdir is the 'work' directory under workbasedir */
struct dentry *workdir;
/* index directory listing overlay inodes by origin file handle */
struct dentry *indexdir;
long namelen;
/* pathnames of lower and upper dirs, for show_options */
struct ovl_config config;
/* creds of process who forced instantiation of super block */
const struct cred *creator_cred;
bool tmpfile;
bool noxattr;
/* Did we take the inuse lock? */
bool upperdir_locked;
bool workdir_locked;
/* Inode numbers in all layers do not use the high xino_bits */
unsigned int xino_bits;
};
/* private information held for every overlayfs dentry */
struct ovl_entry {
union {
struct {
unsigned long flags;
};
struct rcu_head rcu;
};
unsigned numlower;
struct ovl_path lowerstack[];
};
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
static inline struct ovl_entry *OVL_E(struct dentry *dentry)
{
return (struct ovl_entry *) dentry->d_fsdata;
}
struct ovl_inode {
struct ovl_dir_cache *cache;
const char *redirect;
u64 version;
unsigned long flags;
struct inode vfs_inode;
struct dentry *__upperdentry;
struct inode *lower;
/* synchronize copy up and more */
struct mutex lock;
};
static inline struct ovl_inode *OVL_I(struct inode *inode)
{
return container_of(inode, struct ovl_inode, vfs_inode);
}
static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
{
return READ_ONCE(oi->__upperdentry);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,678 @@
/*
* Copyright (C) 2011 Novell Inc.
* Copyright (C) 2016 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/exportfs.h>
#include <linux/uuid.h>
#include <linux/namei.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
int ovl_want_write(struct dentry *dentry)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
return mnt_want_write(ofs->upper_mnt);
}
void ovl_drop_write(struct dentry *dentry)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
mnt_drop_write(ofs->upper_mnt);
}
struct dentry *ovl_workdir(struct dentry *dentry)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
return ofs->workdir;
}
const struct cred *ovl_override_creds(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
return override_creds(ofs->creator_cred);
}
struct super_block *ovl_same_sb(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
if (!ofs->numlowerfs)
return ofs->upper_mnt->mnt_sb;
else if (ofs->numlowerfs == 1 && !ofs->upper_mnt)
return ofs->lower_fs[0].sb;
else
return NULL;
}
/*
* Check if underlying fs supports file handles and try to determine encoding
* type, in order to deduce maximum inode number used by fs.
*
* Return 0 if file handles are not supported.
* Return 1 (FILEID_INO32_GEN) if fs uses the default 32bit inode encoding.
* Return -1 if fs uses a non default encoding with unknown inode size.
*/
int ovl_can_decode_fh(struct super_block *sb)
{
if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry ||
uuid_is_null(&sb->s_uuid))
return 0;
return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
}
struct dentry *ovl_indexdir(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
return ofs->indexdir;
}
/* Index all files on copy up. For now only enabled for NFS export */
bool ovl_index_all(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
return ofs->config.nfs_export && ofs->config.index;
}
/* Verify lower origin on lookup. For now only enabled for NFS export */
bool ovl_verify_lower(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
return ofs->config.nfs_export && ofs->config.index;
}
struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
{
size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
struct ovl_entry *oe = kzalloc(size, GFP_KERNEL);
if (oe)
oe->numlower = numlower;
return oe;
}
bool ovl_dentry_remote(struct dentry *dentry)
{
return dentry->d_flags &
(DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE |
DCACHE_OP_REAL);
}
bool ovl_dentry_weird(struct dentry *dentry)
{
return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
DCACHE_MANAGE_TRANSIT |
DCACHE_OP_HASH |
DCACHE_OP_COMPARE);
}
enum ovl_path_type ovl_path_type(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
enum ovl_path_type type = 0;
if (ovl_dentry_upper(dentry)) {
type = __OVL_PATH_UPPER;
/*
* Non-dir dentry can hold lower dentry of its copy up origin.
*/
if (oe->numlower) {
type |= __OVL_PATH_ORIGIN;
if (d_is_dir(dentry))
type |= __OVL_PATH_MERGE;
}
} else {
if (oe->numlower > 1)
type |= __OVL_PATH_MERGE;
}
return type;
}
void ovl_path_upper(struct dentry *dentry, struct path *path)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
path->mnt = ofs->upper_mnt;
path->dentry = ovl_dentry_upper(dentry);
}
void ovl_path_lower(struct dentry *dentry, struct path *path)
{
struct ovl_entry *oe = dentry->d_fsdata;
if (oe->numlower) {
path->mnt = oe->lowerstack[0].layer->mnt;
path->dentry = oe->lowerstack[0].dentry;
} else {
*path = (struct path) { };
}
}
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
{
enum ovl_path_type type = ovl_path_type(dentry);
if (!OVL_TYPE_UPPER(type))
ovl_path_lower(dentry, path);
else
ovl_path_upper(dentry, path);
return type;
}
struct dentry *ovl_dentry_upper(struct dentry *dentry)
{
return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
}
struct dentry *ovl_dentry_lower(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
return oe->numlower ? oe->lowerstack[0].dentry : NULL;
}
struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
return oe->numlower ? oe->lowerstack[0].layer : NULL;
}
struct dentry *ovl_dentry_real(struct dentry *dentry)
{
return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
}
struct dentry *ovl_i_dentry_upper(struct inode *inode)
{
return ovl_upperdentry_dereference(OVL_I(inode));
}
struct inode *ovl_inode_upper(struct inode *inode)
{
struct dentry *upperdentry = ovl_i_dentry_upper(inode);
return upperdentry ? d_inode(upperdentry) : NULL;
}
struct inode *ovl_inode_lower(struct inode *inode)
{
return OVL_I(inode)->lower;
}
struct inode *ovl_inode_real(struct inode *inode)
{
return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
}
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
{
return OVL_I(inode)->cache;
}
void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
{
OVL_I(inode)->cache = cache;
}
void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry)
{
set_bit(flag, &OVL_E(dentry)->flags);
}
void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry)
{
clear_bit(flag, &OVL_E(dentry)->flags);
}
bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry)
{
return test_bit(flag, &OVL_E(dentry)->flags);
}
bool ovl_dentry_is_opaque(struct dentry *dentry)
{
return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry);
}
bool ovl_dentry_is_whiteout(struct dentry *dentry)
{
return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
}
void ovl_dentry_set_opaque(struct dentry *dentry)
{
ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
}
/*
* For hard links and decoded file handles, it's possible for ovl_dentry_upper()
* to return positive, while there's no actual upper alias for the inode.
* Copy up code needs to know about the existence of the upper alias, so it
* can't use ovl_dentry_upper().
*/
bool ovl_dentry_has_upper_alias(struct dentry *dentry)
{
return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry);
}
void ovl_dentry_set_upper_alias(struct dentry *dentry)
{
ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
}
bool ovl_redirect_dir(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
return ofs->config.redirect_dir && !ofs->noxattr;
}
const char *ovl_dentry_get_redirect(struct dentry *dentry)
{
return OVL_I(d_inode(dentry))->redirect;
}
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
{
struct ovl_inode *oi = OVL_I(d_inode(dentry));
kfree(oi->redirect);
oi->redirect = redirect;
}
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
struct dentry *lowerdentry)
{
struct inode *realinode = d_inode(upperdentry ?: lowerdentry);
if (upperdentry)
OVL_I(inode)->__upperdentry = upperdentry;
if (lowerdentry)
OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
ovl_copyattr(realinode, inode);
if (!inode->i_ino)
inode->i_ino = realinode->i_ino;
}
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
{
struct inode *upperinode = d_inode(upperdentry);
WARN_ON(OVL_I(inode)->__upperdentry);
/*
* Make sure upperdentry is consistent before making it visible
*/
smp_wmb();
OVL_I(inode)->__upperdentry = upperdentry;
if (inode_unhashed(inode)) {
if (!inode->i_ino)
inode->i_ino = upperinode->i_ino;
inode->i_private = upperinode;
__insert_inode_hash(inode, (unsigned long) upperinode);
}
}
void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
{
struct inode *inode = d_inode(dentry);
WARN_ON(!inode_is_locked(inode));
/*
* Version is used by readdir code to keep cache consistent. For merge
* dirs all changes need to be noted. For non-merge dirs, cache only
* contains impure (ones which have been copied up and have origins)
* entries, so only need to note changes to impure entries.
*/
if (OVL_TYPE_MERGE(ovl_path_type(dentry)) || impurity)
OVL_I(inode)->version++;
}
u64 ovl_dentry_version_get(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
WARN_ON(!inode_is_locked(inode));
return OVL_I(inode)->version;
}
bool ovl_is_whiteout(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
return inode && IS_WHITEOUT(inode);
}
struct file *ovl_path_open(struct path *path, int flags)
{
return dentry_open(path, flags | O_NOATIME, current_cred());
}
int ovl_copy_up_start(struct dentry *dentry)
{
struct ovl_inode *oi = OVL_I(d_inode(dentry));
int err;
err = mutex_lock_interruptible(&oi->lock);
if (!err && ovl_dentry_has_upper_alias(dentry)) {
err = 1; /* Already copied up */
mutex_unlock(&oi->lock);
}
return err;
}
void ovl_copy_up_end(struct dentry *dentry)
{
mutex_unlock(&OVL_I(d_inode(dentry))->lock);
}
bool ovl_check_origin_xattr(struct dentry *dentry)
{
int res;
res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
/* Zero size value means "copied up but origin unknown" */
if (res >= 0)
return true;
return false;
}
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
{
int res;
char val;
if (!d_is_dir(dentry))
return false;
res = vfs_getxattr(dentry, name, &val, 1);
if (res == 1 && val == 'y')
return true;
return false;
}
int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
const char *name, const void *value, size_t size,
int xerr)
{
int err;
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
if (ofs->noxattr)
return xerr;
err = ovl_do_setxattr(upperdentry, name, value, size, 0);
if (err == -EOPNOTSUPP) {
pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
ofs->noxattr = true;
return xerr;
}
return err;
}
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
{
int err;
if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
return 0;
/*
* Do not fail when upper doesn't support xattrs.
* Upper inodes won't have origin nor redirect xattr anyway.
*/
err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
"y", 1, 0);
if (!err)
ovl_set_flag(OVL_IMPURE, d_inode(dentry));
return err;
}
void ovl_set_flag(unsigned long flag, struct inode *inode)
{
set_bit(flag, &OVL_I(inode)->flags);
}
void ovl_clear_flag(unsigned long flag, struct inode *inode)
{
clear_bit(flag, &OVL_I(inode)->flags);
}
bool ovl_test_flag(unsigned long flag, struct inode *inode)
{
return test_bit(flag, &OVL_I(inode)->flags);
}
/**
* Caller must hold a reference to inode to prevent it from being freed while
* it is marked inuse.
*/
bool ovl_inuse_trylock(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
bool locked = false;
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_OVL_INUSE)) {
inode->i_state |= I_OVL_INUSE;
locked = true;
}
spin_unlock(&inode->i_lock);
return locked;
}
void ovl_inuse_unlock(struct dentry *dentry)
{
if (dentry) {
struct inode *inode = d_inode(dentry);
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_OVL_INUSE));
inode->i_state &= ~I_OVL_INUSE;
spin_unlock(&inode->i_lock);
}
}
/*
* Does this overlay dentry need to be indexed on copy up?
*/
bool ovl_need_index(struct dentry *dentry)
{
struct dentry *lower = ovl_dentry_lower(dentry);
if (!lower || !ovl_indexdir(dentry->d_sb))
return false;
/* Index all files for NFS export and consistency verification */
if (ovl_index_all(dentry->d_sb))
return true;
/* Index only lower hardlinks on copy up */
if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
return true;
return false;
}
/* Caller must hold OVL_I(inode)->lock */
static void ovl_cleanup_index(struct dentry *dentry)
{
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
struct inode *dir = indexdir->d_inode;
struct dentry *lowerdentry = ovl_dentry_lower(dentry);
struct dentry *upperdentry = ovl_dentry_upper(dentry);
struct dentry *index = NULL;
struct inode *inode;
struct qstr name = { };
int err;
err = ovl_get_index_name(lowerdentry, &name);
if (err)
goto fail;
inode = d_inode(upperdentry);
if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
upperdentry, inode->i_ino, inode->i_nlink);
/*
* We either have a bug with persistent union nlink or a lower
* hardlink was added while overlay is mounted. Adding a lower
* hardlink and then unlinking all overlay hardlinks would drop
* overlay nlink to zero before all upper inodes are unlinked.
* As a safety measure, when that situation is detected, set
* the overlay nlink to the index inode nlink minus one for the
* index entry itself.
*/
set_nlink(d_inode(dentry), inode->i_nlink - 1);
ovl_set_nlink_upper(dentry);
goto out;
}
inode_lock_nested(dir, I_MUTEX_PARENT);
index = lookup_one_len(name.name, indexdir, name.len);
err = PTR_ERR(index);
if (IS_ERR(index)) {
index = NULL;
} else if (ovl_index_all(dentry->d_sb)) {
/* Whiteout orphan index to block future open by handle */
err = ovl_cleanup_and_whiteout(indexdir, dir, index);
} else {
/* Cleanup orphan index entries */
err = ovl_cleanup(dir, index);
}
inode_unlock(dir);
if (err)
goto fail;
out:
kfree(name.name);
dput(index);
return;
fail:
pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err);
goto out;
}
/*
* Operations that change overlay inode and upper inode nlink need to be
* synchronized with copy up for persistent nlink accounting.
*/
int ovl_nlink_start(struct dentry *dentry, bool *locked)
{
struct ovl_inode *oi = OVL_I(d_inode(dentry));
const struct cred *old_cred;
int err;
if (!d_inode(dentry))
return 0;
/*
* With inodes index is enabled, we store the union overlay nlink
* in an xattr on the index inode. When whiting out an indexed lower,
* we need to decrement the overlay persistent nlink, but before the
* first copy up, we have no upper index inode to store the xattr.
*
* As a workaround, before whiteout/rename over an indexed lower,
* copy up to create the upper index. Creating the upper index will
* initialize the overlay nlink, so it could be dropped if unlink
* or rename succeeds.
*
* TODO: implement metadata only index copy up when called with
* ovl_copy_up_flags(dentry, O_PATH).
*/
if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) {
err = ovl_copy_up(dentry);
if (err)
return err;
}
err = mutex_lock_interruptible(&oi->lock);
if (err)
return err;
if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
goto out;
old_cred = ovl_override_creds(dentry->d_sb);
/*
* The overlay inode nlink should be incremented/decremented IFF the
* upper operation succeeds, along with nlink change of upper inode.
* Therefore, before link/unlink/rename, we store the union nlink
* value relative to the upper inode nlink in an upper inode xattr.
*/
err = ovl_set_nlink_upper(dentry);
revert_creds(old_cred);
out:
if (err)
mutex_unlock(&oi->lock);
else
*locked = true;
return err;
}
void ovl_nlink_end(struct dentry *dentry, bool locked)
{
if (locked) {
if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
d_inode(dentry)->i_nlink == 0) {
const struct cred *old_cred;
old_cred = ovl_override_creds(dentry->d_sb);
ovl_cleanup_index(dentry);
revert_creds(old_cred);
}
mutex_unlock(&OVL_I(d_inode(dentry))->lock);
}
}
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
{
/* Workdir should not be the same as upperdir */
if (workdir == upperdir)
goto err;
/* Workdir should not be subdir of upperdir and vice versa */
if (lock_rename(workdir, upperdir) != NULL)
goto err_unlock;
return 0;
err_unlock:
unlock_rename(workdir, upperdir);
err:
pr_err("overlayfs: failed to lock workdir+upperdir\n");
return -EIO;
}