ulfs_readwrite.c revision 1.1 1 /* $NetBSD: ulfs_readwrite.c,v 1.1 2013/06/06 00:40:55 dholland Exp $ */
2 /* from NetBSD: ufs_readwrite.c,v 1.105 2013/01/22 09:39:18 dholland Exp */
3
4 /*-
5 * Copyright (c) 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(1, "$NetBSD: ulfs_readwrite.c,v 1.1 2013/06/06 00:40:55 dholland Exp $");
37
38 #ifdef LFS_READWRITE
39 #define FS struct lfs
40 #define I_FS i_lfs
41 #define READ lfs_read
42 #define READ_S "lfs_read"
43 #define WRITE lfs_write
44 #define WRITE_S "lfs_write"
45 #define fs_bsize lfs_bsize
46 #define fs_bmask lfs_bmask
47 #define UFS_WAPBL_BEGIN(mp) 0
48 #define UFS_WAPBL_END(mp) do { } while (0)
49 #define UFS_WAPBL_UPDATE(vp, access, modify, flags) do { } while (0)
50 #else
51 #define FS struct fs
52 #define I_FS i_fs
53 #define READ ffs_read
54 #define READ_S "ffs_read"
55 #define WRITE ffs_write
56 #define WRITE_S "ffs_write"
57 #endif
58
59 /*
60 * Vnode op for reading.
61 */
62 /* ARGSUSED */
63 int
64 READ(void *v)
65 {
66 struct vop_read_args /* {
67 struct vnode *a_vp;
68 struct uio *a_uio;
69 int a_ioflag;
70 kauth_cred_t a_cred;
71 } */ *ap = v;
72 struct vnode *vp;
73 struct inode *ip;
74 struct uio *uio;
75 struct ufsmount *ump;
76 struct buf *bp;
77 FS *fs;
78 vsize_t bytelen;
79 daddr_t lbn, nextlbn;
80 off_t bytesinfile;
81 long size, xfersize, blkoffset;
82 int error, ioflag;
83 bool usepc = false;
84
85 vp = ap->a_vp;
86 ip = VTOI(vp);
87 ump = ip->i_ump;
88 uio = ap->a_uio;
89 ioflag = ap->a_ioflag;
90 error = 0;
91
92 #ifdef DIAGNOSTIC
93 if (uio->uio_rw != UIO_READ)
94 panic("%s: mode", READ_S);
95
96 if (vp->v_type == VLNK) {
97 if (ip->i_size < ump->um_maxsymlinklen ||
98 (ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0))
99 panic("%s: short symlink", READ_S);
100 } else if (vp->v_type != VREG && vp->v_type != VDIR)
101 panic("%s: type %d", READ_S, vp->v_type);
102 #endif
103 fs = ip->I_FS;
104 if ((u_int64_t)uio->uio_offset > ump->um_maxfilesize)
105 return (EFBIG);
106 if (uio->uio_resid == 0)
107 return (0);
108
109 #ifndef LFS_READWRITE
110 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT)
111 return ffs_snapshot_read(vp, uio, ioflag);
112 #endif /* !LFS_READWRITE */
113
114 fstrans_start(vp->v_mount, FSTRANS_SHARED);
115
116 if (uio->uio_offset >= ip->i_size)
117 goto out;
118
119 #ifdef LFS_READWRITE
120 usepc = (vp->v_type == VREG && ip->i_number != LFS_IFILE_INUM);
121 #else /* !LFS_READWRITE */
122 usepc = vp->v_type == VREG;
123 #endif /* !LFS_READWRITE */
124 if (usepc) {
125 const int advice = IO_ADV_DECODE(ap->a_ioflag);
126
127 while (uio->uio_resid > 0) {
128 if (ioflag & IO_DIRECT) {
129 genfs_directio(vp, uio, ioflag);
130 }
131 bytelen = MIN(ip->i_size - uio->uio_offset,
132 uio->uio_resid);
133 if (bytelen == 0)
134 break;
135 error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice,
136 UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
137 if (error)
138 break;
139 }
140 goto out;
141 }
142
143 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
144 bytesinfile = ip->i_size - uio->uio_offset;
145 if (bytesinfile <= 0)
146 break;
147 lbn = lblkno(fs, uio->uio_offset);
148 nextlbn = lbn + 1;
149 size = blksize(fs, ip, lbn);
150 blkoffset = blkoff(fs, uio->uio_offset);
151 xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
152 bytesinfile);
153
154 if (lblktosize(fs, nextlbn) >= ip->i_size)
155 error = bread(vp, lbn, size, NOCRED, 0, &bp);
156 else {
157 int nextsize = blksize(fs, ip, nextlbn);
158 error = breadn(vp, lbn,
159 size, &nextlbn, &nextsize, 1, NOCRED, 0, &bp);
160 }
161 if (error)
162 break;
163
164 /*
165 * We should only get non-zero b_resid when an I/O error
166 * has occurred, which should cause us to break above.
167 * However, if the short read did not cause an error,
168 * then we want to ensure that we do not uiomove bad
169 * or uninitialized data.
170 */
171 size -= bp->b_resid;
172 if (size < xfersize) {
173 if (size == 0)
174 break;
175 xfersize = size;
176 }
177 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
178 if (error)
179 break;
180 brelse(bp, 0);
181 }
182 if (bp != NULL)
183 brelse(bp, 0);
184
185 out:
186 if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
187 ip->i_flag |= IN_ACCESS;
188 if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) {
189 error = UFS_WAPBL_BEGIN(vp->v_mount);
190 if (error) {
191 fstrans_done(vp->v_mount);
192 return error;
193 }
194 error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
195 UFS_WAPBL_END(vp->v_mount);
196 }
197 }
198
199 fstrans_done(vp->v_mount);
200 return (error);
201 }
202
203 /*
204 * Vnode op for writing.
205 */
206 int
207 WRITE(void *v)
208 {
209 struct vop_write_args /* {
210 struct vnode *a_vp;
211 struct uio *a_uio;
212 int a_ioflag;
213 kauth_cred_t a_cred;
214 } */ *ap = v;
215 struct vnode *vp;
216 struct uio *uio;
217 struct inode *ip;
218 FS *fs;
219 struct buf *bp;
220 kauth_cred_t cred;
221 daddr_t lbn;
222 off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize;
223 int blkoffset, error, flags, ioflag, resid, size, xfersize;
224 int aflag;
225 int extended=0;
226 vsize_t bytelen;
227 bool async;
228 bool usepc = false;
229 #ifdef LFS_READWRITE
230 bool need_unreserve = false;
231 #endif
232 struct ufsmount *ump;
233
234 cred = ap->a_cred;
235 ioflag = ap->a_ioflag;
236 uio = ap->a_uio;
237 vp = ap->a_vp;
238 ip = VTOI(vp);
239 ump = ip->i_ump;
240
241 KASSERT(vp->v_size == ip->i_size);
242 #ifdef DIAGNOSTIC
243 if (uio->uio_rw != UIO_WRITE)
244 panic("%s: mode", WRITE_S);
245 #endif
246
247 switch (vp->v_type) {
248 case VREG:
249 if (ioflag & IO_APPEND)
250 uio->uio_offset = ip->i_size;
251 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
252 return (EPERM);
253 /* FALLTHROUGH */
254 case VLNK:
255 break;
256 case VDIR:
257 if ((ioflag & IO_SYNC) == 0)
258 panic("%s: nonsync dir write", WRITE_S);
259 break;
260 default:
261 panic("%s: type", WRITE_S);
262 }
263
264 fs = ip->I_FS;
265 if (uio->uio_offset < 0 ||
266 (u_int64_t)uio->uio_offset + uio->uio_resid > ump->um_maxfilesize)
267 return (EFBIG);
268 #ifdef LFS_READWRITE
269 /* Disallow writes to the Ifile, even if noschg flag is removed */
270 /* XXX can this go away when the Ifile is no longer in the namespace? */
271 if (vp == fs->lfs_ivnode)
272 return (EPERM);
273 #endif
274 if (uio->uio_resid == 0)
275 return (0);
276
277 fstrans_start(vp->v_mount, FSTRANS_SHARED);
278
279 flags = ioflag & IO_SYNC ? B_SYNC : 0;
280 async = vp->v_mount->mnt_flag & MNT_ASYNC;
281 origoff = uio->uio_offset;
282 resid = uio->uio_resid;
283 osize = ip->i_size;
284 error = 0;
285
286 usepc = vp->v_type == VREG;
287
288 if ((ioflag & IO_JOURNALLOCKED) == 0) {
289 error = UFS_WAPBL_BEGIN(vp->v_mount);
290 if (error) {
291 fstrans_done(vp->v_mount);
292 return error;
293 }
294 }
295
296 #ifdef LFS_READWRITE
297 async = true;
298 lfs_availwait(fs, btofsb(fs, uio->uio_resid));
299 lfs_check(vp, LFS_UNUSED_LBN, 0);
300 #endif /* !LFS_READWRITE */
301 if (!usepc)
302 goto bcache;
303
304 preallocoff = round_page(blkroundup(fs, MAX(osize, uio->uio_offset)));
305 aflag = ioflag & IO_SYNC ? B_SYNC : 0;
306 nsize = MAX(osize, uio->uio_offset + uio->uio_resid);
307 endallocoff = nsize - blkoff(fs, nsize);
308
309 /*
310 * if we're increasing the file size, deal with expanding
311 * the fragment if there is one.
312 */
313
314 if (nsize > osize && lblkno(fs, osize) < UFS_NDADDR &&
315 lblkno(fs, osize) != lblkno(fs, nsize) &&
316 blkroundup(fs, osize) != osize) {
317 off_t eob;
318
319 eob = blkroundup(fs, osize);
320 uvm_vnp_setwritesize(vp, eob);
321 error = ufs_balloc_range(vp, osize, eob - osize, cred, aflag);
322 if (error)
323 goto out;
324 if (flags & B_SYNC) {
325 mutex_enter(vp->v_interlock);
326 VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask),
327 round_page(eob),
328 PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED);
329 }
330 }
331
332 while (uio->uio_resid > 0) {
333 int ubc_flags = UBC_WRITE;
334 bool overwrite; /* if we're overwrite a whole block */
335 off_t newoff;
336
337 if (ioflag & IO_DIRECT) {
338 genfs_directio(vp, uio, ioflag | IO_JOURNALLOCKED);
339 }
340
341 oldoff = uio->uio_offset;
342 blkoffset = blkoff(fs, uio->uio_offset);
343 bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
344 if (bytelen == 0) {
345 break;
346 }
347
348 /*
349 * if we're filling in a hole, allocate the blocks now and
350 * initialize the pages first. if we're extending the file,
351 * we can safely allocate blocks without initializing pages
352 * since the new blocks will be inaccessible until the write
353 * is complete.
354 */
355 overwrite = uio->uio_offset >= preallocoff &&
356 uio->uio_offset < endallocoff;
357 if (!overwrite && (vp->v_vflag & VV_MAPPED) == 0 &&
358 blkoff(fs, uio->uio_offset) == 0 &&
359 (uio->uio_offset & PAGE_MASK) == 0) {
360 vsize_t len;
361
362 len = trunc_page(bytelen);
363 len -= blkoff(fs, len);
364 if (len > 0) {
365 overwrite = true;
366 bytelen = len;
367 }
368 }
369
370 newoff = oldoff + bytelen;
371 if (vp->v_size < newoff) {
372 uvm_vnp_setwritesize(vp, newoff);
373 }
374
375 if (!overwrite) {
376 error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
377 cred, aflag);
378 if (error)
379 break;
380 } else {
381 genfs_node_wrlock(vp);
382 error = GOP_ALLOC(vp, uio->uio_offset, bytelen,
383 aflag, cred);
384 genfs_node_unlock(vp);
385 if (error)
386 break;
387 ubc_flags |= UBC_FAULTBUSY;
388 }
389
390 /*
391 * copy the data.
392 */
393
394 error = ubc_uiomove(&vp->v_uobj, uio, bytelen,
395 IO_ADV_DECODE(ioflag), ubc_flags | UBC_UNMAP_FLAG(vp));
396
397 /*
398 * update UVM's notion of the size now that we've
399 * copied the data into the vnode's pages.
400 *
401 * we should update the size even when uiomove failed.
402 */
403
404 if (vp->v_size < newoff) {
405 uvm_vnp_setsize(vp, newoff);
406 extended = 1;
407 }
408
409 if (error)
410 break;
411
412 /*
413 * flush what we just wrote if necessary.
414 * XXXUBC simplistic async flushing.
415 */
416
417 #ifndef LFS_READWRITE
418 if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
419 mutex_enter(vp->v_interlock);
420 error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
421 (uio->uio_offset >> 16) << 16,
422 PGO_CLEANIT | PGO_JOURNALLOCKED | PGO_LAZY);
423 if (error)
424 break;
425 }
426 #endif
427 }
428 if (error == 0 && ioflag & IO_SYNC) {
429 mutex_enter(vp->v_interlock);
430 error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask),
431 round_page(blkroundup(fs, uio->uio_offset)),
432 PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED);
433 }
434 goto out;
435
436 bcache:
437 mutex_enter(vp->v_interlock);
438 VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid),
439 PGO_CLEANIT | PGO_FREE | PGO_SYNCIO | PGO_JOURNALLOCKED);
440 while (uio->uio_resid > 0) {
441 lbn = lblkno(fs, uio->uio_offset);
442 blkoffset = blkoff(fs, uio->uio_offset);
443 xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
444 if (fs->fs_bsize > xfersize)
445 flags |= B_CLRBUF;
446 else
447 flags &= ~B_CLRBUF;
448
449 #ifdef LFS_READWRITE
450 error = lfs_reserve(fs, vp, NULL,
451 btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift));
452 if (error)
453 break;
454 need_unreserve = true;
455 #endif
456 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
457 ap->a_cred, flags, &bp);
458
459 if (error)
460 break;
461 if (uio->uio_offset + xfersize > ip->i_size) {
462 ip->i_size = uio->uio_offset + xfersize;
463 DIP_ASSIGN(ip, size, ip->i_size);
464 uvm_vnp_setsize(vp, ip->i_size);
465 extended = 1;
466 }
467 size = blksize(fs, ip, lbn) - bp->b_resid;
468 if (xfersize > size)
469 xfersize = size;
470
471 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
472
473 /*
474 * if we didn't clear the block and the uiomove failed,
475 * the buf will now contain part of some other file,
476 * so we need to invalidate it.
477 */
478 if (error && (flags & B_CLRBUF) == 0) {
479 brelse(bp, BC_INVAL);
480 break;
481 }
482 #ifdef LFS_READWRITE
483 (void)VOP_BWRITE(bp->b_vp, bp);
484 lfs_reserve(fs, vp, NULL,
485 -btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift));
486 need_unreserve = false;
487 #else
488 if (ioflag & IO_SYNC)
489 (void)bwrite(bp);
490 else if (xfersize + blkoffset == fs->fs_bsize)
491 bawrite(bp);
492 else
493 bdwrite(bp);
494 #endif
495 if (error || xfersize == 0)
496 break;
497 }
498 #ifdef LFS_READWRITE
499 if (need_unreserve) {
500 lfs_reserve(fs, vp, NULL,
501 -btofsb(fs, (UFS_NIADDR + 1) << fs->lfs_bshift));
502 }
503 #endif
504
505 /*
506 * If we successfully wrote any data, and we are not the superuser
507 * we clear the setuid and setgid bits as a precaution against
508 * tampering.
509 */
510 out:
511 ip->i_flag |= IN_CHANGE | IN_UPDATE;
512 if (vp->v_mount->mnt_flag & MNT_RELATIME)
513 ip->i_flag |= IN_ACCESS;
514 if (resid > uio->uio_resid && ap->a_cred) {
515 if (ip->i_mode & ISUID) {
516 if (kauth_authorize_vnode(ap->a_cred,
517 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) {
518 ip->i_mode &= ~ISUID;
519 DIP_ASSIGN(ip, mode, ip->i_mode);
520 }
521 }
522
523 if (ip->i_mode & ISGID) {
524 if (kauth_authorize_vnode(ap->a_cred,
525 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) {
526 ip->i_mode &= ~ISGID;
527 DIP_ASSIGN(ip, mode, ip->i_mode);
528 }
529 }
530 }
531 if (resid > uio->uio_resid)
532 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
533 if (error) {
534 (void) UFS_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred);
535 uio->uio_offset -= resid - uio->uio_resid;
536 uio->uio_resid = resid;
537 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
538 error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
539 else
540 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
541 KASSERT(vp->v_size == ip->i_size);
542 if ((ioflag & IO_JOURNALLOCKED) == 0)
543 UFS_WAPBL_END(vp->v_mount);
544 fstrans_done(vp->v_mount);
545
546 return (error);
547 }
548