fdfs.c revision 1.3 1 /* $NetBSD: fdfs.c,v 1.3 2006/05/12 19:33:02 perseant Exp $ */
2
3 /*-
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Buffer cache routines for a file-descriptor backed filesystem.
41 * This is part of lfs_cleanerd so there is also a "segment pointer" that
42 * we can make buffers out of without duplicating memory or reading the data
43 * again.
44 */
45
46 #include <err.h>
47 #include <fcntl.h>
48 #include <time.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <unistd.h>
53
54 #include <sys/syslog.h>
55 #include <sys/param.h>
56 #include <sys/mount.h>
57 #include <sys/stat.h>
58
59 #include "vnode.h"
60 #include "bufcache.h"
61 #include "fdfs.h"
62
63 /*
64 * Return a "vnode" interface to a given file descriptor.
65 */
66 struct uvnode *
67 fd_vget(int fd, int bsize, int segsize, int nseg)
68 {
69 struct fdfs *fs;
70 struct uvnode *vp;
71 int i;
72
73 fs = (struct fdfs *)malloc(sizeof(*fs));
74 if (fs == NULL)
75 return NULL;
76 if (segsize > 0) {
77 fs->fd_bufp = (struct fd_buf *)malloc(nseg *
78 sizeof(struct fd_buf));
79 if (fs->fd_bufp == NULL) {
80 free(fs);
81 return NULL;
82 }
83 for (i = 0; i < nseg; i++) {
84 fs->fd_bufp[i].start = 0x0;
85 fs->fd_bufp[i].end = 0x0;
86 fs->fd_bufp[i].buf = (char *)malloc(segsize);
87 if (fs->fd_bufp[i].buf == NULL) {
88 while (--i >= 0)
89 free(fs->fd_bufp[i].buf);
90 free(fs->fd_bufp);
91 free(fs);
92 return NULL;
93 }
94 }
95 } else
96 fs->fd_bufp = NULL;
97
98 fs->fd_fd = fd;
99 fs->fd_bufc = nseg;
100 fs->fd_bufi = 0;
101 fs->fd_bsize = bsize;
102 fs->fd_ssize = segsize;
103
104 vp = (struct uvnode *) malloc(sizeof(*vp));
105 if (vp == NULL) {
106 for (i = nseg - 1; i >= 0; i--)
107 free(fs->fd_bufp[i].buf);
108 free(fs->fd_bufp);
109 free(fs);
110 return NULL;
111 }
112 memset(vp, 0, sizeof(*vp));
113 vp->v_fd = fd;
114 vp->v_fs = fs;
115 vp->v_usecount = 0;
116 vp->v_strategy_op = fd_vop_strategy;
117 vp->v_bwrite_op = fd_vop_bwrite;
118 vp->v_bmap_op = fd_vop_bmap;
119 LIST_INIT(&vp->v_cleanblkhd);
120 LIST_INIT(&vp->v_dirtyblkhd);
121 vp->v_data = NULL;
122
123 return vp;
124 }
125
126 /*
127 * Deallocate a vnode.
128 */
129 void
130 fd_reclaim(struct uvnode *vp)
131 {
132 int i;
133 struct ubuf *bp;
134 struct fdfs *fs;
135
136 while ((bp = LIST_FIRST(&vp->v_dirtyblkhd)) != NULL) {
137 bremfree(bp);
138 buf_destroy(bp);
139 }
140 while ((bp = LIST_FIRST(&vp->v_cleanblkhd)) != NULL) {
141 bremfree(bp);
142 buf_destroy(bp);
143 }
144
145 fs = (struct fdfs *)vp->v_fs;
146 for (i = 0; i < fs->fd_bufc; i++)
147 free(fs->fd_bufp[i].buf);
148 free(fs->fd_bufp);
149 free(fs);
150 memset(vp, 0, sizeof(vp));
151 }
152
153 /*
154 * We won't be using that last segment after all.
155 */
156 void
157 fd_release(struct uvnode *vp)
158 {
159 --((struct fdfs *)vp->v_fs)->fd_bufi;
160 }
161
162 /*
163 * Reset buffer pointer to first buffer.
164 */
165 void
166 fd_release_all(struct uvnode *vp)
167 {
168 ((struct fdfs *)vp->v_fs)->fd_bufi = 0;
169 }
170
171 /*
172 * Prepare a segment buffer which we will expect to read from.
173 * We never increment fd_bufi unless we have succeeded to allocate the space,
174 * if necessary, and have read the segment.
175 */
176 int
177 fd_preload(struct uvnode *vp, daddr_t start)
178 {
179 struct fdfs *fs = (struct fdfs *)vp->v_fs;
180 struct fd_buf *t;
181 int r;
182
183 /* We might need to allocate more buffers. */
184 if (fs->fd_bufi == fs->fd_bufc) {
185 ++fs->fd_bufc;
186 syslog(LOG_DEBUG, "increasing number of segment buffers to %d",
187 fs->fd_bufc);
188 t = realloc(fs->fd_bufp, fs->fd_bufc * sizeof(struct fd_buf));
189 if (t == NULL) {
190 syslog(LOG_NOTICE, "failed resizing table to %d\n",
191 fs->fd_bufc);
192 return -1;
193 }
194 fs->fd_bufp = t;
195 fs->fd_bufp[fs->fd_bufi].start = 0x0;
196 fs->fd_bufp[fs->fd_bufi].end = 0x0;
197 fs->fd_bufp[fs->fd_bufi].buf = (char *)malloc(fs->fd_ssize);
198 if (fs->fd_bufp[fs->fd_bufi].buf == NULL) {
199 syslog(LOG_NOTICE, "failed to allocate buffer #%d\n",
200 fs->fd_bufc);
201 --fs->fd_bufc;
202 return -1;
203 }
204 }
205
206 /* Read the current buffer. */
207 fs->fd_bufp[fs->fd_bufi].start = start;
208 fs->fd_bufp[fs->fd_bufi].end = start + fs->fd_ssize / fs->fd_bsize;
209
210 if ((r = pread(fs->fd_fd, fs->fd_bufp[fs->fd_bufi].buf,
211 (size_t)fs->fd_ssize, start * fs->fd_bsize)) < 0) {
212 syslog(LOG_ERR, "preload to segment buffer %d", fs->fd_bufi);
213 return r;
214 }
215
216 fs->fd_bufi = fs->fd_bufi + 1;
217 return 0;
218 }
219
220 /*
221 * Get a pointer to a block contained in one of the segment buffers,
222 * as if from bread() but avoiding the buffer cache.
223 */
224 char *
225 fd_ptrget(struct uvnode *vp, daddr_t bn)
226 {
227 int i;
228 struct fdfs *fs;
229
230 fs = (struct fdfs *)vp->v_fs;
231 for (i = 0; i < fs->fd_bufc; i++) {
232 if (bn >= fs->fd_bufp[i].start && bn < fs->fd_bufp[i].end) {
233 return fs->fd_bufp[i].buf +
234 (bn - fs->fd_bufp[i].start) * fs->fd_bsize;
235 }
236 }
237 return NULL;
238 }
239
240 /*
241 * Strategy routine. We can read from the segment buffer if requested.
242 */
243 int
244 fd_vop_strategy(struct ubuf * bp)
245 {
246 struct fdfs *fs;
247 char *cp;
248 int count;
249
250 fs = (struct fdfs *)bp->b_vp->v_fs;
251 if (bp->b_flags & B_READ) {
252 if ((cp = fd_ptrget(bp->b_vp, bp->b_blkno)) != NULL) {
253 free(bp->b_data);
254 bp->b_data = cp;
255 bp->b_flags |= (B_DONTFREE | B_DONE);
256 return 0;
257 }
258 count = pread(bp->b_vp->v_fd, bp->b_data, bp->b_bcount,
259 bp->b_blkno * fs->fd_bsize);
260 if (count == bp->b_bcount)
261 bp->b_flags |= B_DONE;
262 } else {
263 count = pwrite(bp->b_vp->v_fd, bp->b_data, bp->b_bcount,
264 bp->b_blkno * fs->fd_bsize);
265 if (count == 0) {
266 perror("pwrite");
267 return -1;
268 }
269 bp->b_flags &= ~B_DELWRI;
270 reassignbuf(bp, bp->b_vp);
271 }
272 return 0;
273 }
274
275 /*
276 * Delayed write.
277 */
278 int
279 fd_vop_bwrite(struct ubuf * bp)
280 {
281 bp->b_flags |= B_DELWRI;
282 reassignbuf(bp, bp->b_vp);
283 brelse(bp);
284 return 0;
285 }
286
287 /*
288 * Map lbn to disk address. Since we are using the file
289 * descriptor as the "disk", the disk address is meaningless
290 * and we just return the block address.
291 */
292 int
293 fd_vop_bmap(struct uvnode * vp, daddr_t lbn, daddr_t * daddrp)
294 {
295 *daddrp = lbn;
296 return 0;
297 }
298