Home | History | Annotate | Line # | Download | only in sort
      1 /*	$NetBSD: files.c,v 1.43 2023/08/10 20:36:29 mrg Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Ben Harris and Jaromir Dolecek.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1993
     34  *	The Regents of the University of California.  All rights reserved.
     35  *
     36  * This code is derived from software contributed to Berkeley by
     37  * Peter McIlroy.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code must retain the above copyright
     43  *    notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  * 3. Neither the name of the University nor the names of its contributors
     48  *    may be used to endorse or promote products derived from this software
     49  *    without specific prior written permission.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61  * SUCH DAMAGE.
     62  */
     63 
     64 #include "sort.h"
     65 #include "fsort.h"
     66 
     67 __RCSID("$NetBSD: files.c,v 1.43 2023/08/10 20:36:29 mrg Exp $");
     68 
     69 #include <string.h>
     70 
     71 /* Align records in temporary files to avoid misaligned copies */
     72 #define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1))
     73 
     74 static ssize_t	seq(FILE *, u_char **);
     75 
     76 /*
     77  * this is called when there is no special key. It's only called
     78  * in the first fsort pass.
     79  */
     80 
     81 static u_char *opos;
     82 static size_t osz;
     83 
     84 void
     85 makeline_copydown(RECHEADER *recbuf)
     86 {
     87 	memmove(recbuf->data, opos, osz);
     88 }
     89 
     90 int
     91 makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
     92 {
     93 	u_char *pos;
     94 	int c;
     95 
     96 	pos = recbuf->data;
     97 	if (osz != 0) {
     98 		/*
     99 		 * Buffer shortage is solved by either of two ways:
    100 		 * o flush previous buffered data and start using the
    101 		 *   buffer from start.
    102 		 *   makeline_copydown() above must be called.
    103 		 * o realloc buffer
    104 		 *
    105 		 * This code has relied on realloc changing 'bufend',
    106 		 * but that isn't necessarily true.
    107 		 */
    108 		pos += osz;
    109 		osz = 0;
    110 	}
    111 
    112 	while (pos < bufend) {
    113 		c = getc(fp);
    114 		if (c == EOF) {
    115 			if (pos == recbuf->data) {
    116 				FCLOSE(fp);
    117 				return EOF;
    118 			}
    119 			/* Add terminator to partial line */
    120 			c = REC_D;
    121 		}
    122 		*pos++ = c;
    123 		if (c == REC_D) {
    124 			recbuf->offset = 0;
    125 			recbuf->length = pos - recbuf->data;
    126 			recbuf->keylen = recbuf->length - 1;
    127 			return (0);
    128 		}
    129 	}
    130 
    131 	/* Ran out of buffer space... */
    132 	if (recbuf->data < bufend) {
    133 		/* Remember where the partial record is */
    134 		osz = pos - recbuf->data;
    135 		opos = recbuf->data;
    136 	}
    137 	return (BUFFEND);
    138 }
    139 
    140 /*
    141  * This generates keys. It's only called in the first fsort pass
    142  */
    143 int
    144 makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
    145 {
    146 	static u_char *line_data;
    147 	static ssize_t line_size;
    148 	static int overflow = 0;
    149 
    150 	/* We get re-entered after returning BUFFEND - save old data */
    151 	if (overflow) {
    152 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
    153 		return overflow ? BUFFEND : 0;
    154 	}
    155 
    156 	line_size = seq(fp, &line_data);
    157 	if (line_size == 0) {
    158 		FCLOSE(fp);
    159 		return EOF;
    160 	}
    161 
    162 	if (line_size > bufend - recbuf->data) {
    163 		overflow = 1;
    164 	} else {
    165 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
    166 	}
    167 	return overflow ? BUFFEND : 0;
    168 }
    169 
    170 /*
    171  * get a line of input from fp
    172  */
    173 static ssize_t
    174 seq(FILE *fp, u_char **line)
    175 {
    176 	static u_char *buf;
    177 	static size_t buf_size = DEFLLEN;
    178 	u_char *end, *pos;
    179 	int c;
    180 	u_char *new_buf;
    181 
    182 	if (!buf) {
    183 		/* one-time initialization */
    184 		buf = malloc(buf_size);
    185 		if (!buf)
    186 		    err(2, "malloc of linebuf for %zu bytes failed",
    187 			    buf_size);
    188 	}
    189 
    190 	end = buf + buf_size;
    191 	pos = buf;
    192 	while ((c = getc(fp)) != EOF) {
    193 		*pos++ = c;
    194 		if (c == REC_D) {
    195 			*line = buf;
    196 			return pos - buf;
    197 		}
    198 		if (pos == end) {
    199 			/* Long line - double size of buffer */
    200 			/* XXX: Check here for stupidly long lines */
    201 			buf_size *= 2;
    202 			ptrdiff_t off = pos - buf;
    203 			new_buf = realloc(buf, buf_size);
    204 			if (!new_buf)
    205 				err(2, "realloc of linebuf to %zu bytes failed",
    206 					buf_size);
    207 
    208 			end = new_buf + buf_size;
    209 			pos = new_buf + off;
    210 			buf = new_buf;
    211 		}
    212 	}
    213 
    214 	if (pos != buf) {
    215 		/* EOF part way through line - add line terminator */
    216 		*pos++ = REC_D;
    217 		*line = buf;
    218 		return pos - buf;
    219 	}
    220 
    221 	return 0;
    222 }
    223 
    224 /*
    225  * write a key/line pair to a temporary file
    226  */
    227 void
    228 putrec(const RECHEADER *rec, FILE *fp)
    229 {
    230 	EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp,
    231 	       "failed to write temp file");
    232 }
    233 
    234 /*
    235  * write a line to output
    236  */
    237 void
    238 putline(const RECHEADER *rec, FILE *fp)
    239 {
    240 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp,
    241 	       "failed to write");
    242 }
    243 
    244 /*
    245  * write dump of key to output (for -Dk)
    246  */
    247 void
    248 putkeydump(const RECHEADER *rec, FILE *fp)
    249 {
    250 	EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp,
    251 	       "failed to write debug key");
    252 }
    253 
    254 /*
    255  * get a record from a temporary file. (Used by merge sort.)
    256  */
    257 int
    258 geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
    259 {
    260 	length_t file_len;
    261 	int i;
    262 
    263 	(void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]);
    264 
    265 	if ((u_char *)(rec + 1) > end)
    266 		return (BUFFEND);
    267 	if (!fread(&rec->length, 1, sizeof rec->length, fp)) {
    268 		fclose(fp);
    269 		return (EOF);
    270 	}
    271 	file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length);
    272 	if (end - rec->data < (ptrdiff_t)file_len) {
    273 		for (i = sizeof rec->length - 1; i >= 0;  i--)
    274 			ungetc(*((char *) rec + i), fp);
    275 		return (BUFFEND);
    276 	}
    277 
    278 	fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp);
    279 	return (0);
    280 }
    281