tmpfs.h revision 1.8.2.2 1 /* $NetBSD: tmpfs.h,v 1.8.2.2 2005/10/29 17:28:19 yamt Exp $ */
2
3 /*
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #if !defined(_TMPFS_H_)
41 # define _TMPFS_H_
42 #else
43 # error "tmpfs.h cannot be included multiple times."
44 #endif
45
46 /* ---------------------------------------------------------------------
47 * KERNEL-SPECIFIC DEFINITIONS
48 * --------------------------------------------------------------------- */
49
50 #if defined(_KERNEL)
51
52 #include <sys/dirent.h>
53 #include <sys/mount.h>
54 #include <sys/queue.h>
55 #include <sys/vnode.h>
56
57 #include <fs/tmpfs/tmpfs_pool.h>
58
59 /* --------------------------------------------------------------------- */
60
61 /*
62 * Internal representation of a tmpfs directory entry.
63 */
64 struct tmpfs_dirent {
65 TAILQ_ENTRY(tmpfs_dirent) td_entries;
66
67 /* Length of the name stored in this directory entry. This avoids
68 * the need to recalculate it every time the name is used. */
69 uint16_t td_namelen;
70
71 /* The name of the entry, allocated from a string pool. This
72 * string is not required to be zero-terminated; therefore, the
73 * td_namelen field must always be used when accessing its value. */
74 char * td_name;
75
76 /* Pointer to the node this entry refers to. */
77 struct tmpfs_node * td_node;
78 };
79
80 /* A directory in tmpfs holds a sorted list of directory entries, which in
81 * turn point to other files (which can be directories themselves).
82 *
83 * In tmpfs, this list is managed by a tail queue, whose head is defined by
84 * the struct tmpfs_dir type.
85 *
86 * It is imporant to notice that directories do not have entries for . and
87 * .. as other file systems do. These can be generated when requested
88 * based on information available by other means, such as the pointer to
89 * the node itself in the former case or the pointer to the parent directory
90 * in the latter case. This is done to simplify tmpfs's code and, more
91 * importantly, to remove redundancy. */
92 TAILQ_HEAD(tmpfs_dir, tmpfs_dirent);
93
94 #define TMPFS_DIRCOOKIE(dirent) ((off_t)(uintptr_t)(dirent))
95 #define TMPFS_DIRCOOKIE_DOT 0
96 #define TMPFS_DIRCOOKIE_DOTDOT 1
97 #define TMPFS_DIRCOOKIE_EOF 2
98
99 /* --------------------------------------------------------------------- */
100
101 /*
102 * Internal representation of a tmpfs file system node.
103 *
104 * This structure is splitted in two parts: one holds attributes common
105 * to all file types and the other holds data that is only applicable to
106 * a particular type. The code must be careful to only access those
107 * attributes that are actually allowed by the node's type.
108 */
109 struct tmpfs_node {
110 /* Doubly-linked list entry which links all existing nodes for a
111 * single file system. This is provided to ease the removal of
112 * all nodes during the unmount operation. */
113 LIST_ENTRY(tmpfs_node) tn_entries;
114
115 /* The node's type. Any of 'VBLK', 'VCHR', 'VDIR', 'VFIFO',
116 * 'VLNK', 'VREG' and 'VSOCK' is allowed. The usage of vnode
117 * types instead of a custom enumeration is to make things simpler
118 * and faster, as we do not need to convert between two types. */
119 enum vtype tn_type;
120
121 /* Node identifier. */
122 ino_t tn_id;
123
124 /* Node's internal status. This is used by several file system
125 * operations to do modifications to the node in a delayed
126 * fashion. */
127 int tn_status;
128 #define TMPFS_NODE_ACCESSED (1 << 1)
129 #define TMPFS_NODE_MODIFIED (1 << 2)
130 #define TMPFS_NODE_CHANGED (1 << 3)
131
132 /* The node size. It does not necessarily match the real amount
133 * of memory consumed by it. */
134 off_t tn_size;
135
136 /* Generic node attributes. */
137 uid_t tn_uid;
138 gid_t tn_gid;
139 mode_t tn_mode;
140 int tn_flags;
141 nlink_t tn_links;
142 struct timespec tn_atime;
143 struct timespec tn_mtime;
144 struct timespec tn_ctime;
145 struct timespec tn_birthtime;
146 unsigned long tn_gen;
147
148 /* Head of byte-level lock list (used by tmpfs_advlock). */
149 struct lockf * tn_lockf;
150
151 /* As there is a single vnode for each active file within the
152 * system, care has to be taken to avoid allocating more than one
153 * vnode per file. In order to do this, a bidirectional association
154 * is kept between vnodes and nodes.
155 *
156 * Whenever a vnode is allocated, its v_data field is updated to
157 * point to the node it references. At the same time, the node's
158 * tn_vnode field is modified to point to the new vnode representing
159 * it. Further attempts to allocate a vnode for this same node will
160 * result in returning a new reference to the value stored in
161 * tn_vnode.
162 *
163 * May be NULL when the node is unused (that is, no vnode has been
164 * allocated for it or it has been reclaimed). */
165 struct vnode * tn_vnode;
166
167 /* Pointer to the node returned by tmpfs_lookup() after doing a
168 * delete or a rename lookup; its value is only valid in these two
169 * situations. In case we were looking up . or .., it holds a null
170 * pointer. */
171 struct tmpfs_dirent * tn_lookup_dirent;
172
173 union {
174 /* Valid when tn_type == VBLK || tn_type == VCHR. */
175 struct {
176 dev_t tn_rdev;
177 };
178
179 /* Valid when tn_type == VDIR. */
180 struct {
181 /* Pointer to the parent directory. The root
182 * directory has a pointer to itself in this field;
183 * this property identifies the root node. */
184 struct tmpfs_node * tn_parent;
185
186 /* Head of a tail-queue that links the contents of
187 * the directory together. See above for a
188 * description of its contents. */
189 struct tmpfs_dir tn_dir;
190
191 /* Number and pointer of the first directory entry
192 * returned by the readdir operation if it were
193 * called again to continue reading data from the
194 * same directory as before. This is used to speed
195 * up reads of long directories, assuming that no
196 * more than one read is in progress at a given time.
197 * Otherwise, these values are discarded and a linear
198 * scan is performed from the beginning up to the
199 * point where readdir starts returning values. */
200 off_t tn_readdir_lastn;
201 struct tmpfs_dirent * tn_readdir_lastp;
202 };
203
204 /* Valid when tn_type == VLNK. */
205 struct {
206 /* The link's target, allocated from a string pool. */
207 char * tn_link;
208 };
209
210 /* Valid when tn_type == VREG. */
211 struct {
212 /* The contents of regular files stored in a tmpfs
213 * file system are represented by a single anonymous
214 * memory object (aobj, for short). The aobj provides
215 * direct access to any position within the file,
216 * because its contents are always mapped in a
217 * contiguous region of virtual memory. It is a task
218 * of the memory management subsystem (see uvm(9)) to
219 * issue the required page ins or page outs whenever
220 * a position within the file is accessed. */
221 struct uvm_object * tn_aobj;
222 size_t tn_aobj_pages;
223 };
224 };
225 };
226 LIST_HEAD(tmpfs_node_list, tmpfs_node);
227
228 /* --------------------------------------------------------------------- */
229
230 /*
231 * Internal representation of a tmpfs mount point.
232 */
233 struct tmpfs_mount {
234 /* Maximum number of memory pages available for use by the file
235 * system, set during mount time. This variable must never be
236 * used directly as it may be bigger that the current amount of
237 * free memory; in the extreme case, it will hold the SIZE_MAX
238 * value. Instead, use the TMPFS_PAGES_MAX macro. */
239 size_t tm_pages_max;
240
241 /* Number of pages in use by the file system. Cannot be bigger
242 * than the value returned by TMPFS_PAGES_MAX in any case. */
243 size_t tm_pages_used;
244
245 /* Pointer to the node representing the root directory of this
246 * file system. */
247 struct tmpfs_node * tm_root;
248
249 /* Maximum number of possible nodes for this file system; set
250 * during mount time. We need a hard limit on the maximum number
251 * of nodes to avoid allocating too much of them; their objects
252 * cannot be released until the file system is unmounted.
253 * Otherwise, we could easily run out of memory by creating lots
254 * of empty files and then simply removing them. */
255 ino_t tm_nodes_max;
256
257 /* Number of nodes currently allocated. This number only grows.
258 * When it reaches tm_nodes_max, no more new nodes can be allocated.
259 * Of course, the old, unused ones can be reused. */
260 ino_t tm_nodes_last;
261
262 /* Nodes are organized in two different lists. The used list
263 * contains all nodes that are currently used by the file system;
264 * i.e., they refer to existing files. The available list contains
265 * all nodes that are currently available for use by new files.
266 * Nodes must be kept in this list (instead of deleting them)
267 * because we need to keep track of their generation number (tn_gen
268 * field).
269 *
270 * Note that nodes are lazily allocated: if the available list is
271 * empty and we have enough space to create more nodes, they will be
272 * created and inserted in the used list. Once these are released,
273 * they will go into the available list, remaining alive until the
274 * file system is unmounted. */
275 struct tmpfs_node_list tm_nodes_used;
276 struct tmpfs_node_list tm_nodes_avail;
277
278 /* Pools used to store file system meta data. These are not shared
279 * across several instances of tmpfs for the reasons described in
280 * tmpfs_pool.c. */
281 struct tmpfs_pool tm_dirent_pool;
282 struct tmpfs_pool tm_node_pool;
283 struct tmpfs_str_pool tm_str_pool;
284 };
285
286 /* --------------------------------------------------------------------- */
287
288 /*
289 * This structure maps a file identifier to a tmpfs node. Used by the
290 * NFS code.
291 */
292 struct tmpfs_fid {
293 uint16_t tf_len;
294 uint16_t tf_pad;
295 ino_t tf_id;
296 unsigned long tf_gen;
297 };
298
299 /* --------------------------------------------------------------------- */
300
301 /*
302 * Prototypes for tmpfs_subr.c.
303 */
304
305 int tmpfs_alloc_node(struct tmpfs_mount *, enum vtype,
306 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *,
307 char *, dev_t, struct proc *, struct tmpfs_node **);
308 void tmpfs_free_node(struct tmpfs_mount *, struct tmpfs_node *);
309 int tmpfs_alloc_dirent(struct tmpfs_mount *, struct tmpfs_node *,
310 const char *, uint16_t, struct tmpfs_dirent **);
311 void tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *,
312 boolean_t);
313 int tmpfs_alloc_vp(struct mount *, struct tmpfs_node *, struct vnode **);
314 void tmpfs_free_vp(struct vnode *);
315 int tmpfs_alloc_file(struct vnode *, struct vnode **, struct vattr *,
316 struct componentname *, char *);
317 void tmpfs_dir_attach(struct vnode *, struct tmpfs_dirent *);
318 void tmpfs_dir_detach(struct vnode *, struct tmpfs_dirent *);
319 struct tmpfs_dirent * tmpfs_dir_lookup(struct tmpfs_node *node,
320 struct componentname *cnp);
321 int tmpfs_dir_getdotdent(struct tmpfs_node *, struct uio *);
322 int tmpfs_dir_getdotdotdent(struct tmpfs_node *, struct uio *);
323 struct tmpfs_dirent * tmpfs_dir_lookupbycookie(struct tmpfs_node *, off_t);
324 int tmpfs_dir_getdents(struct tmpfs_node *, struct uio *, off_t *);
325 int tmpfs_reg_resize(struct vnode *, off_t);
326 size_t tmpfs_mem_info(boolean_t);
327 int tmpfs_chflags(struct vnode *, int, struct ucred *, struct proc *);
328 int tmpfs_chmod(struct vnode *, mode_t, struct ucred *, struct proc *);
329 int tmpfs_chown(struct vnode *, uid_t, gid_t, struct ucred *,
330 struct proc *);
331 int tmpfs_chsize(struct vnode *, u_quad_t, struct ucred *, struct proc *);
332 int tmpfs_chtimes(struct vnode *, struct timespec *, struct timespec *,
333 int, struct ucred *, struct proc *);
334 void tmpfs_itimes(struct vnode *, const struct timespec *,
335 const struct timespec *);
336
337 void tmpfs_update(struct vnode *, const struct timespec *,
338 const struct timespec *, int);
339 int tmpfs_truncate(struct vnode *, off_t);
340
341 /* --------------------------------------------------------------------- */
342
343 /*
344 * Convenience macros to simplify some logical expressions.
345 */
346 #define IMPLIES(a, b) (!(a) || (b))
347 #define IFF(a, b) (IMPLIES(a, b) && IMPLIES(b, a))
348
349 /* --------------------------------------------------------------------- */
350
351 /*
352 * Checks that the directory entry pointed by 'de' matches the name 'name'
353 * with a length of 'len'.
354 */
355 #define TMPFS_DIRENT_MATCHES(de, name, len) \
356 (de->td_namelen == (uint16_t)len && \
357 memcmp((de)->td_name, (name), (de)->td_namelen) == 0)
358
359 /* --------------------------------------------------------------------- */
360
361 /*
362 * Ensures that the node pointed by 'node' is a directory and that its
363 * contents are consistent with respect to directories.
364 */
365 #define TMPFS_VALIDATE_DIR(node) \
366 KASSERT((node)->tn_type == VDIR); \
367 KASSERT((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \
368 KASSERT((node)->tn_readdir_lastp == NULL || \
369 TMPFS_DIRCOOKIE((node)->tn_readdir_lastp) == (node)->tn_readdir_lastn);
370
371 /* --------------------------------------------------------------------- */
372
373 /*
374 * Memory management stuff.
375 */
376
377 /* Amount of memory pages to reserve for the system (e.g., to not use by
378 * tmpfs).
379 * XXX: Should this be tunable through sysctl, for instance? */
380 #define TMPFS_PAGES_RESERVED (4 * 1024 * 1024 / PAGE_SIZE)
381
382 /* Returns the maximum size allowed for a tmpfs file system. This macro
383 * must be used instead of directly retrieving the value from tm_pages_max.
384 * The reason is that the size of a tmpfs file system is dynamic: it lets
385 * the user store files as long as there is enough free memory (including
386 * physical memory and swap space). Therefore, the amount of memory to be
387 * used is either the limit imposed by the user during mount time or the
388 * amount of available memory, whichever is lower. To avoid consuming all
389 * the memory for a given mount point, the system will always reserve a
390 * minimum of TMPFS_PAGES_RESERVED pages, which is also taken into account
391 * by this macro (see above). */
392 static inline size_t
393 TMPFS_PAGES_MAX(struct tmpfs_mount *tmp)
394 {
395 size_t freepages;
396
397 freepages = tmpfs_mem_info(FALSE);
398 if (freepages < TMPFS_PAGES_RESERVED)
399 freepages = 0;
400 else
401 freepages -= TMPFS_PAGES_RESERVED;
402
403 return MIN(tmp->tm_pages_max, freepages + tmp->tm_pages_used);
404 }
405
406 /* Returns the available space for the given file system. */
407 #define TMPFS_PAGES_AVAIL(tmp) (TMPFS_PAGES_MAX(tmp) - (tmp)->tm_pages_used)
408
409 /* --------------------------------------------------------------------- */
410
411 /*
412 * Macros/functions to convert from generic data structures to tmpfs
413 * specific ones.
414 *
415 * Macros are used when no sanity checks have to be done, as they provide
416 * the fastest conversion. On the other hand, inlined functions are used
417 * when expensive sanity checks are enabled, mostly because the checks
418 * have to be done separately from the return value.
419 */
420
421 #if defined(DIAGNOSTIC)
422 static inline
423 struct tmpfs_mount *
424 VFS_TO_TMPFS(struct mount *mp)
425 {
426 struct tmpfs_mount *tmp;
427
428 KASSERT((mp) != NULL && (mp)->mnt_data != NULL);
429 tmp = (struct tmpfs_mount *)(mp)->mnt_data;
430 KASSERT(TMPFS_PAGES_MAX(tmp) >= tmp->tm_pages_used);
431 return tmp;
432 }
433
434 static inline
435 struct tmpfs_node *
436 VP_TO_TMPFS_NODE(struct vnode *vp)
437 {
438 struct tmpfs_node *node;
439
440 KASSERT((vp) != NULL && (vp)->v_data != NULL);
441 node = (struct tmpfs_node *)vp->v_data;
442 return node;
443 }
444
445 static inline
446 struct tmpfs_node *
447 VP_TO_TMPFS_DIR(struct vnode *vp)
448 {
449 struct tmpfs_node *node;
450
451 node = VP_TO_TMPFS_NODE(vp);
452 TMPFS_VALIDATE_DIR(node);
453 return node;
454 }
455 #else
456 # define VFS_TO_TMPFS(mp) ((struct tmpfs_mount *)mp->mnt_data)
457 # define VP_TO_TMPFS_NODE(vp) ((struct tmpfs_node *)vp->v_data)
458 # define VP_TO_TMPFS_DIR(vp) VP_TO_TMPFS_NODE(vp)
459 #endif
460
461 #endif /* _KERNEL */
462
463 /* ---------------------------------------------------------------------
464 * USER AND KERNEL DEFINITIONS
465 * --------------------------------------------------------------------- */
466
467 /*
468 * This structure is used to communicate mount parameters between userland
469 * and kernel space.
470 */
471 #define TMPFS_ARGS_VERSION 1
472 struct tmpfs_args {
473 int ta_version;
474
475 /* Size counters. */
476 ino_t ta_nodes_max;
477 off_t ta_size_max;
478
479 /* Root node attributes. */
480 uid_t ta_root_uid;
481 gid_t ta_root_gid;
482 mode_t ta_root_mode;
483 };
484