Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h (revision 249431) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h (working copy) @@ -591,6 +591,7 @@ */ #define DMU_READ_PREFETCH 0 /* prefetch */ #define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ +#define DMU_READ_FAIL_SPARSE 2 int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void *buf, uint32_t flags); void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c (revision 249431) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c (working copy) @@ -412,12 +412,19 @@ zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); blkid = dbuf_whichblock(dn, offset); for (i = 0; i < nblks; i++) { - dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); - if (db == NULL) { - rw_exit(&dn->dn_struct_rwlock); - dmu_buf_rele_array(dbp, nblks, tag); - zio_nowait(zio); - return (SET_ERROR(EIO)); + dmu_buf_impl_t *db; + int dberr = dbuf_hold_impl(dn, 0, blkid + i, + flags & DMU_READ_FAIL_SPARSE ? TRUE : FALSE, tag, &db); + if (dberr) { + if (dberr == ENOENT && flags & DMU_READ_FAIL_SPARSE && read) { + dbp[i] = NULL; + continue; + } else { + rw_exit(&dn->dn_struct_rwlock); + dmu_buf_rele_array(dbp, nblks, tag); + zio_nowait(zio); + return (SET_ERROR(EIO)); + } } /* initiate async i/o */ if (read) @@ -442,7 +449,7 @@ /* wait for other io to complete */ if (read) { - for (i = 0; i < nblks; i++) { + for (i = 0; i < nblks; i++) if (dbp[i]) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; mutex_enter(&db->db_mtx); while (db->db_state == DB_READ || @@ -981,21 +988,32 @@ dmu_buf_t **dbp; int numbufs, i, err; xuio_t *xuio = NULL; + dnode_t *dn; + uint32_t blocksize = 0; + uint64_t blkid; +#ifdef UIO_XUIO + if (uio->uio_extflg == UIO_XUIO) + xuio = (xuio_t *)uio; +#endif + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + /* * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. */ - err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, - &numbufs, &dbp); - if (err) + /* XXX - optimize sparse reads for xuio too? */ + err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size, TRUE, + FTAG, &numbufs, &dbp, + DMU_READ_PREFETCH | (xuio ? 0 : DMU_READ_FAIL_SPARSE)); + if (err) { + dnode_rele(dn, FTAG); return (err); + } -#ifdef UIO_XUIO - if (uio->uio_extflg == UIO_XUIO) - xuio = (xuio_t *)uio; -#endif - for (i = 0; i < numbufs; i++) { int tocpy; int bufoff; @@ -1003,9 +1021,22 @@ ASSERT(size > 0); - bufoff = uio->uio_loffset - db->db_offset; - tocpy = (int)MIN(db->db_size - bufoff, size); + if (db) { + bufoff = uio->uio_loffset - db->db_offset; + tocpy = (int)MIN(db->db_size - bufoff, size); + } else { /* sparse optimization */ + if (!blocksize) { + rw_enter(&dn->dn_struct_rwlock, RW_READER); + blocksize = dn->dn_datablksz; + blkid = dbuf_whichblock(dn, uio->uio_loffset) - i; + rw_exit(&dn->dn_struct_rwlock); + ASSERT(blocksize > 0); + } + bufoff = uio->uio_loffset - (blkid + i) * blocksize; + tocpy = (int)MIN(blocksize - bufoff, size); + } + if (xuio) { dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db; arc_buf_t *dbuf_abuf = dbi->db_buf; @@ -1020,15 +1051,23 @@ XUIOSTAT_BUMP(xuiostat_rbuf_nocopy); else XUIOSTAT_BUMP(xuiostat_rbuf_copied); - } else { + } else if (db) { err = uiomove((char *)db->db_data + bufoff, tocpy, UIO_READ, uio); + } else { /* sparse optimization */ + void *zbuf = __DECONST(void *, zero_region); + do { + ssize_t len = MIN(tocpy, ZERO_REGION_SIZE); + err = uiomove(zbuf, len, UIO_READ, uio); + tocpy -= len; + } while (tocpy && !err); } if (err) break; size -= tocpy; } + dnode_rele(dn, FTAG); dmu_buf_rele_array(dbp, numbufs, FTAG); return (err);