Add UFS writesuspension mechanism, designed to allow userland processes
to modify on-disk metadata for filesystems mounted for write. Reviewed by: kib, mckusick Sponsored by: FreeBSD Foundation
This commit is contained in:
@@ -3578,6 +3578,7 @@ ufs/ffs/ffs_tables.c optional ffs
|
||||
ufs/ffs/ffs_vfsops.c optional ffs
|
||||
ufs/ffs/ffs_vnops.c optional ffs
|
||||
ufs/ffs/ffs_rawread.c optional directio
|
||||
ufs/ffs/ffs_suspend.c optional ffs
|
||||
ufs/ufs/ufs_acl.c optional ffs
|
||||
ufs/ufs/ufs_bmap.c optional ffs
|
||||
ufs/ufs/ufs_dirhash.c optional ffs
|
||||
|
||||
@@ -79,9 +79,11 @@ int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t);
|
||||
void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t);
|
||||
int ffs_mountroot(void);
|
||||
void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
|
||||
int ffs_own_mount(const struct mount *mp);
|
||||
int ffs_reallocblks(struct vop_reallocblks_args *);
|
||||
int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
|
||||
ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
|
||||
int ffs_reload(struct mount *, struct thread *, int);
|
||||
int ffs_sbupdate(struct ufsmount *, int, int);
|
||||
void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
|
||||
int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t,
|
||||
@@ -100,6 +102,8 @@ int ffs_valloc(struct vnode *, int, struct ucred *, struct vnode **);
|
||||
int ffs_vfree(struct vnode *, ino_t, int);
|
||||
vfs_vget_t ffs_vget;
|
||||
int ffs_vgetf(struct mount *, ino_t, int, struct vnode **, int);
|
||||
void ffs_susp_initialize(void);
|
||||
void ffs_susp_uninitialize(void);
|
||||
|
||||
#define FFSV_FORCEINSMQ 0x0001
|
||||
|
||||
|
||||
@@ -0,0 +1,338 @@
|
||||
/*-
|
||||
* Copyright (c) 2012 The FreeBSD Foundation
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software was developed by Edward Tomasz Napierala under sponsorship
|
||||
* from the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/ioccom.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/jail.h>
|
||||
#include <sys/sx.h>
|
||||
|
||||
#include <security/mac/mac_framework.h>
|
||||
|
||||
#include <ufs/ufs/extattr.h>
|
||||
#include <ufs/ufs/quota.h>
|
||||
#include <ufs/ufs/ufsmount.h>
|
||||
#include <ufs/ufs/inode.h>
|
||||
|
||||
#include <ufs/ffs/fs.h>
|
||||
#include <ufs/ffs/ffs_extern.h>
|
||||
|
||||
static d_open_t ffs_susp_open;
|
||||
static d_write_t ffs_susp_rdwr;
|
||||
static d_ioctl_t ffs_susp_ioctl;
|
||||
|
||||
static struct cdevsw ffs_susp_cdevsw = {
|
||||
.d_version = D_VERSION,
|
||||
.d_open = ffs_susp_open,
|
||||
.d_read = ffs_susp_rdwr,
|
||||
.d_write = ffs_susp_rdwr,
|
||||
.d_ioctl = ffs_susp_ioctl,
|
||||
.d_name = "ffs_susp",
|
||||
};
|
||||
|
||||
static struct cdev *ffs_susp_dev;
|
||||
static struct sx ffs_susp_lock;
|
||||
|
||||
static int
|
||||
ffs_susp_suspended(struct mount *mp)
|
||||
{
|
||||
struct ufsmount *ump;
|
||||
|
||||
sx_assert(&ffs_susp_lock, SA_LOCKED);
|
||||
|
||||
ump = VFSTOUFS(mp);
|
||||
if (ump->um_writesuspended)
|
||||
return (1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
ffs_susp_open(struct cdev *dev __unused, int flags __unused,
|
||||
int fmt __unused, struct thread *td __unused)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
ffs_susp_rdwr(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
{
|
||||
int error, i;
|
||||
struct vnode *devvp;
|
||||
struct mount *mp;
|
||||
struct ufsmount *ump;
|
||||
struct buf *bp;
|
||||
void *base;
|
||||
size_t len;
|
||||
ssize_t cnt;
|
||||
struct fs *fs;
|
||||
|
||||
sx_slock(&ffs_susp_lock);
|
||||
|
||||
error = devfs_get_cdevpriv((void **)&mp);
|
||||
if (error != 0) {
|
||||
sx_sunlock(&ffs_susp_lock);
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
ump = VFSTOUFS(mp);
|
||||
devvp = ump->um_devvp;
|
||||
fs = ump->um_fs;
|
||||
|
||||
if (ffs_susp_suspended(mp) == 0) {
|
||||
sx_sunlock(&ffs_susp_lock);
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
|
||||
("neither UIO_READ or UIO_WRITE"));
|
||||
KASSERT(uio->uio_segflg == UIO_USERSPACE,
|
||||
("uio->uio_segflg != UIO_USERSPACE"));
|
||||
|
||||
cnt = uio->uio_resid;
|
||||
|
||||
for (i = 0; i < uio->uio_iovcnt; i++) {
|
||||
while (uio->uio_iov[i].iov_len) {
|
||||
base = uio->uio_iov[i].iov_base;
|
||||
len = uio->uio_iov[i].iov_len;
|
||||
if (len > fs->fs_bsize)
|
||||
len = fs->fs_bsize;
|
||||
if (fragoff(fs, uio->uio_offset) != 0 ||
|
||||
fragoff(fs, len) != 0) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
error = bread(devvp, btodb(uio->uio_offset), len,
|
||||
NOCRED, &bp);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
if (uio->uio_rw == UIO_WRITE) {
|
||||
error = copyin(base, bp->b_data, len);
|
||||
if (error != 0) {
|
||||
bp->b_flags |= B_INVAL | B_NOCACHE;
|
||||
brelse(bp);
|
||||
goto out;
|
||||
}
|
||||
error = bwrite(bp);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
} else {
|
||||
error = copyout(bp->b_data, base, len);
|
||||
brelse(bp);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
}
|
||||
uio->uio_iov[i].iov_base =
|
||||
(char *)uio->uio_iov[i].iov_base + len;
|
||||
uio->uio_iov[i].iov_len -= len;
|
||||
uio->uio_resid -= len;
|
||||
uio->uio_offset += len;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
sx_sunlock(&ffs_susp_lock);
|
||||
|
||||
if (uio->uio_resid < cnt)
|
||||
return (0);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
ffs_susp_suspend(struct mount *mp)
|
||||
{
|
||||
struct fs *fs;
|
||||
struct ufsmount *ump;
|
||||
int error;
|
||||
|
||||
sx_assert(&ffs_susp_lock, SA_XLOCKED);
|
||||
|
||||
if (!ffs_own_mount(mp))
|
||||
return (EINVAL);
|
||||
if (ffs_susp_suspended(mp))
|
||||
return (EBUSY);
|
||||
|
||||
ump = VFSTOUFS(mp);
|
||||
fs = ump->um_fs;
|
||||
|
||||
/*
|
||||
* Make sure the calling thread is permitted to access the mounted
|
||||
* device. The permissions can change after we unlock the vnode;
|
||||
* it's harmless.
|
||||
*/
|
||||
vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
|
||||
error = VOP_ACCESS(ump->um_devvp, VREAD | VWRITE,
|
||||
curthread->td_ucred, curthread);
|
||||
VOP_UNLOCK(ump->um_devvp, 0);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
#ifdef MAC
|
||||
if (mac_mount_check_stat(curthread->td_ucred, mp) != 0)
|
||||
return (EPERM);
|
||||
#endif
|
||||
|
||||
if ((error = vfs_write_suspend(mp)) != 0)
|
||||
return (error);
|
||||
|
||||
ump->um_writesuspended = 1;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
ffs_susp_dtor(void *data)
|
||||
{
|
||||
struct fs *fs;
|
||||
struct ufsmount *ump;
|
||||
struct mount *mp;
|
||||
int error;
|
||||
|
||||
sx_xlock(&ffs_susp_lock);
|
||||
|
||||
mp = (struct mount *)data;
|
||||
ump = VFSTOUFS(mp);
|
||||
fs = ump->um_fs;
|
||||
|
||||
if (ffs_susp_suspended(mp) == 0) {
|
||||
sx_xunlock(&ffs_susp_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
KASSERT((mp->mnt_kern_flag & MNTK_SUSPEND) != 0,
|
||||
("MNTK_SUSPEND not set"));
|
||||
|
||||
error = ffs_reload(mp, curthread, 1);
|
||||
if (error != 0)
|
||||
panic("failed to unsuspend writes on %s", fs->fs_fsmnt);
|
||||
|
||||
/*
|
||||
* XXX: The status is kept per-process; the vfs_write_resume() routine
|
||||
* asserts that the resuming thread is the same one that called
|
||||
* vfs_write_suspend(). The cdevpriv data, however, is attached
|
||||
* to the file descriptor, e.g. is inherited during fork. Thus,
|
||||
* it's possible that the resuming process will be different from
|
||||
* the one that started the suspension.
|
||||
*
|
||||
* Work around by fooling the check in vfs_write_resume().
|
||||
*/
|
||||
mp->mnt_susp_owner = curthread;
|
||||
|
||||
vfs_write_resume(mp);
|
||||
vfs_unbusy(mp);
|
||||
ump->um_writesuspended = 0;
|
||||
|
||||
sx_xunlock(&ffs_susp_lock);
|
||||
}
|
||||
|
||||
static int
|
||||
ffs_susp_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
|
||||
struct thread *td)
|
||||
{
|
||||
struct mount *mp;
|
||||
fsid_t *fsidp;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* No suspend inside the jail. Allowing it would require making
|
||||
* sure that e.g. the devfs ruleset for that jail permits access
|
||||
* to the devvp.
|
||||
*/
|
||||
if (jailed(td->td_ucred))
|
||||
return (EPERM);
|
||||
|
||||
sx_xlock(&ffs_susp_lock);
|
||||
|
||||
switch (cmd) {
|
||||
case UFSSUSPEND:
|
||||
fsidp = (fsid_t *)addr;
|
||||
mp = vfs_getvfs(fsidp);
|
||||
if (mp == NULL) {
|
||||
error = ENOENT;
|
||||
break;
|
||||
}
|
||||
error = vfs_busy(mp, 0);
|
||||
vfs_rel(mp);
|
||||
if (error != 0)
|
||||
break;
|
||||
error = ffs_susp_suspend(mp);
|
||||
if (error != 0) {
|
||||
vfs_unbusy(mp);
|
||||
break;
|
||||
}
|
||||
error = devfs_set_cdevpriv(mp, ffs_susp_dtor);
|
||||
KASSERT(error == 0, ("devfs_set_cdevpriv failed"));
|
||||
break;
|
||||
case UFSRESUME:
|
||||
error = devfs_get_cdevpriv((void **)&mp);
|
||||
if (error != 0)
|
||||
break;
|
||||
/*
|
||||
* This calls ffs_susp_dtor, which in turn unsuspends the fs.
|
||||
* The dtor expects to be called without lock held, because
|
||||
* sometimes it's called from here, and sometimes due to the
|
||||
* file being closed or process exiting.
|
||||
*/
|
||||
sx_xunlock(&ffs_susp_lock);
|
||||
devfs_clear_cdevpriv();
|
||||
return (0);
|
||||
default:
|
||||
error = ENXIO;
|
||||
break;
|
||||
}
|
||||
|
||||
sx_xunlock(&ffs_susp_lock);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
ffs_susp_initialize(void)
|
||||
{
|
||||
|
||||
sx_init(&ffs_susp_lock, "ffs_susp");
|
||||
ffs_susp_dev = make_dev(&ffs_susp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
|
||||
"ufssuspend");
|
||||
}
|
||||
|
||||
void
|
||||
ffs_susp_uninitialize(void)
|
||||
{
|
||||
|
||||
destroy_dev(ffs_susp_dev);
|
||||
sx_destroy(&ffs_susp_lock);
|
||||
}
|
||||
+33
-10
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/buf.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/ioccom.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mutex.h>
|
||||
|
||||
@@ -75,7 +76,6 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
|
||||
|
||||
static int ffs_reload(struct mount *, struct thread *);
|
||||
static int ffs_mountfs(struct vnode *, struct mount *, struct thread *);
|
||||
static void ffs_oldfscompat_read(struct fs *, struct ufsmount *,
|
||||
ufs2_daddr_t);
|
||||
@@ -333,7 +333,7 @@ ffs_mount(struct mount *mp)
|
||||
vfs_write_resume(mp);
|
||||
}
|
||||
if ((mp->mnt_flag & MNT_RELOAD) &&
|
||||
(error = ffs_reload(mp, td)) != 0)
|
||||
(error = ffs_reload(mp, td, 0)) != 0)
|
||||
return (error);
|
||||
if (fs->fs_ronly &&
|
||||
!vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
|
||||
@@ -595,8 +595,8 @@ ffs_cmount(struct mntarg *ma, void *data, uint64_t flags)
|
||||
|
||||
/*
|
||||
* Reload all incore data for a filesystem (used after running fsck on
|
||||
* the root filesystem and finding things to fix). The filesystem must
|
||||
* be mounted read-only.
|
||||
* the root filesystem and finding things to fix). If the 'force' flag
|
||||
* is 0, the filesystem must be mounted read-only.
|
||||
*
|
||||
* Things to do to update the mount:
|
||||
* 1) invalidate all cached meta-data.
|
||||
@@ -606,8 +606,8 @@ ffs_cmount(struct mntarg *ma, void *data, uint64_t flags)
|
||||
* 5) invalidate all cached file data.
|
||||
* 6) re-read inode data for all active vnodes.
|
||||
*/
|
||||
static int
|
||||
ffs_reload(struct mount *mp, struct thread *td)
|
||||
int
|
||||
ffs_reload(struct mount *mp, struct thread *td, int force)
|
||||
{
|
||||
struct vnode *vp, *mvp, *devvp;
|
||||
struct inode *ip;
|
||||
@@ -619,9 +619,15 @@ ffs_reload(struct mount *mp, struct thread *td)
|
||||
int i, blks, size, error;
|
||||
int32_t *lp;
|
||||
|
||||
if ((mp->mnt_flag & MNT_RDONLY) == 0)
|
||||
return (EINVAL);
|
||||
ump = VFSTOUFS(mp);
|
||||
|
||||
MNT_ILOCK(mp);
|
||||
if ((mp->mnt_flag & MNT_RDONLY) == 0 && force == 0) {
|
||||
MNT_IUNLOCK(mp);
|
||||
return (EINVAL);
|
||||
}
|
||||
MNT_IUNLOCK(mp);
|
||||
|
||||
/*
|
||||
* Step 1: invalidate all cached meta-data.
|
||||
*/
|
||||
@@ -655,8 +661,7 @@ ffs_reload(struct mount *mp, struct thread *td)
|
||||
newfs->fs_maxcluster = fs->fs_maxcluster;
|
||||
newfs->fs_contigdirs = fs->fs_contigdirs;
|
||||
newfs->fs_active = fs->fs_active;
|
||||
/* The file system is still read-only. */
|
||||
newfs->fs_ronly = 1;
|
||||
newfs->fs_ronly = fs->fs_ronly;
|
||||
sblockloc = fs->fs_sblockloc;
|
||||
bcopy(newfs, fs, (u_int)fs->fs_sbsize);
|
||||
brelse(bp);
|
||||
@@ -710,6 +715,13 @@ ffs_reload(struct mount *mp, struct thread *td)
|
||||
|
||||
loop:
|
||||
MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
|
||||
/*
|
||||
* Skip syncer vnode.
|
||||
*/
|
||||
if (vp->v_type == VNON) {
|
||||
VI_UNLOCK(vp);
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Step 4: invalidate all cached file data.
|
||||
*/
|
||||
@@ -1834,6 +1846,7 @@ ffs_init(vfsp)
|
||||
struct vfsconf *vfsp;
|
||||
{
|
||||
|
||||
ffs_susp_initialize();
|
||||
softdep_initialize();
|
||||
return (ufs_init(vfsp));
|
||||
}
|
||||
@@ -1849,6 +1862,7 @@ ffs_uninit(vfsp)
|
||||
|
||||
ret = ufs_uninit(vfsp);
|
||||
softdep_uninitialize();
|
||||
ffs_susp_uninitialize();
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -2198,6 +2212,15 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
|
||||
g_vfs_strategy(bo, bp);
|
||||
}
|
||||
|
||||
int
|
||||
ffs_own_mount(const struct mount *mp)
|
||||
{
|
||||
|
||||
if (mp->mnt_op == &ufs_vfsops)
|
||||
return (1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef DDB
|
||||
|
||||
static void
|
||||
|
||||
@@ -33,6 +33,8 @@
|
||||
#ifndef _UFS_FFS_FS_H_
|
||||
#define _UFS_FFS_FS_H_
|
||||
|
||||
#include <sys/mount.h>
|
||||
|
||||
/*
|
||||
* Each disk drive contains some number of filesystems.
|
||||
* A filesystem consists of a number of cylinder groups.
|
||||
@@ -763,4 +765,10 @@ CTASSERT(sizeof(union jrec) == JREC_SIZE);
|
||||
extern int inside[], around[];
|
||||
extern u_char *fragtbl[];
|
||||
|
||||
/*
|
||||
* IOCTLs used for filesystem write suspension.
|
||||
*/
|
||||
#define UFSSUSPEND _IOW('U', 1, fsid_t)
|
||||
#define UFSRESUME _IO('U', 2)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -98,6 +98,7 @@ struct ufsmount {
|
||||
char um_qflags[MAXQUOTAS]; /* quota specific flags */
|
||||
int64_t um_savedmaxfilesize; /* XXX - limit maxfilesize */
|
||||
int um_candelete; /* devvp supports TRIM */
|
||||
int um_writesuspended; /* suspension in progress */
|
||||
int (*um_balloc)(struct vnode *, off_t, int, struct ucred *, int, struct buf **);
|
||||
int (*um_blkatoff)(struct vnode *, off_t, char **, struct buf **);
|
||||
int (*um_truncate)(struct vnode *, off_t, int, struct ucred *);
|
||||
|
||||
Reference in New Issue
Block a user