linux_sysfs.c revision 5ad99bdf
1/*
2 * (C) Copyright IBM Corporation 2006
3 * All Rights Reserved.
4 * Copyright 2012 Red Hat, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
20 * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26/**
27 * \file linux_sysfs.c
28 * Access PCI subsystem using Linux's sysfs interface.  This interface is
29 * available starting somewhere in the late 2.5.x kernel phase, and is the
30 * preferred method on all 2.6.x kernels.
31 *
32 * \author Ian Romanick <idr@us.ibm.com>
33 */
34
35#define _GNU_SOURCE
36
37#ifdef HAVE_CONFIG_H
38#include "config.h"
39#endif
40
41#include <stdlib.h>
42#include <string.h>
43#include <stdio.h>
44#include <unistd.h>
45#include <sys/types.h>
46#include <sys/stat.h>
47#include <fcntl.h>
48#include <limits.h>
49#include <sys/mman.h>
50#include <dirent.h>
51#include <errno.h>
52
53#if defined(__i386__) || defined(__x86_64__)
54#include <sys/io.h>
55#else
56#define inb(x) -1
57#define inw(x) -1
58#define inl(x) -1
59#define outb(x,y) do {} while (0)
60#define outw(x,y) do {} while (0)
61#define outl(x,y) do {} while (0)
62#define iopl(x) -1
63#endif
64
65#ifdef HAVE_MTRR
66#include <asm/mtrr.h>
67#include <sys/ioctl.h>
68#endif
69
70#include "pciaccess.h"
71#include "pciaccess_private.h"
72#include "linux_devmem.h"
73
74static const struct pci_system_methods linux_sysfs_methods;
75
76#define SYS_BUS_PCI "/sys/bus/pci/devices"
77
78static int
79pci_device_linux_sysfs_read( struct pci_device * dev, void * data,
80			     pciaddr_t offset, pciaddr_t size,
81			     pciaddr_t * bytes_read );
82
83static int populate_entries(struct pci_system * pci_sys);
84
85/**
86 * Attempt to access PCI subsystem using Linux's sysfs interface.
87 */
88_pci_hidden int
89pci_system_linux_sysfs_create( void )
90{
91    int err = 0;
92    struct stat st;
93
94
95    /* If the directory "/sys/bus/pci/devices" exists, then the PCI subsystem
96     * can be accessed using this interface.
97     */
98
99    if ( stat( SYS_BUS_PCI, & st ) == 0 ) {
100	pci_sys = calloc( 1, sizeof( struct pci_system ) );
101	if ( pci_sys != NULL ) {
102	    pci_sys->methods = & linux_sysfs_methods;
103#ifdef HAVE_MTRR
104	    pci_sys->mtrr_fd = open("/proc/mtrr", O_WRONLY | O_CLOEXEC);
105#endif
106	    err = populate_entries(pci_sys);
107	}
108	else {
109	    err = ENOMEM;
110	}
111    }
112    else {
113	err = errno;
114    }
115
116    return err;
117}
118
119
120/**
121 * Filter out the names "." and ".." from the scanned sysfs entries.
122 *
123 * \param d  Directory entry being processed by \c scandir.
124 *
125 * \return
126 * Zero if the entry name matches either "." or ".."
127 *
128 * \sa scandir, populate_entries
129 */
130static int
131scan_sys_pci_filter( const struct dirent * d )
132{
133    return !((strcmp( d->d_name, "." ) == 0)
134	     || (strcmp( d->d_name, ".." ) == 0));
135}
136
137
138static int
139parse_separate_sysfs_files(struct pci_device * dev)
140{
141    static const char *attrs[] = {
142      "vendor",
143      "device",
144      "class",
145      "revision",
146      "subsystem_vendor",
147      "subsystem_device",
148    };
149    char name[256];
150    char resource[512];
151    uint64_t data[6];
152    int fd;
153    int i;
154
155    for (i = 0; i < 6; i++) {
156	snprintf(name, 255, "%s/%04x:%02x:%02x.%1u/%s",
157		 SYS_BUS_PCI,
158		 dev->domain,
159		 dev->bus,
160		 dev->dev,
161		 dev->func,
162		 attrs[i]);
163
164	fd = open(name, O_RDONLY | O_CLOEXEC);
165	if (fd == -1) {
166	    return errno;
167	}
168
169	read(fd, resource, 512);
170	resource[511] = '\0';
171
172	close(fd);
173
174	data[i] = strtoull(resource, NULL, 16);
175    }
176
177    dev->vendor_id = data[0] & 0xffff;
178    dev->device_id = data[1] & 0xffff;
179    dev->device_class = data[2] & 0xffffff;
180    dev->revision = data[3] & 0xff;
181    dev->subvendor_id = data[4] & 0xffff;
182    dev->subdevice_id = data[5] & 0xffff;
183
184    return 0;
185}
186
187
188int
189populate_entries( struct pci_system * p )
190{
191    struct dirent ** devices = NULL;
192    int n;
193    int i;
194    int err = 0;
195
196
197    n = scandir( SYS_BUS_PCI, & devices, scan_sys_pci_filter, alphasort );
198    if ( n > 0 ) {
199	p->num_devices = n;
200	p->devices = calloc( n, sizeof( struct pci_device_private ) );
201
202	if (p->devices != NULL) {
203	    for (i = 0 ; i < n ; i++) {
204		uint8_t config[48];
205		pciaddr_t bytes;
206		unsigned dom, bus, dev, func;
207		struct pci_device_private *device =
208			(struct pci_device_private *) &p->devices[i];
209
210
211		sscanf(devices[i]->d_name, "%x:%02x:%02x.%1u",
212		       & dom, & bus, & dev, & func);
213
214		device->base.domain = dom;
215		/*
216		 * Applications compiled with older versions  do not expect
217		 * 32-bit domain numbers. To keep them working, we keep a 16-bit
218		 * version of the domain number at the previous location.
219		 */
220		if (dom > 0xffff)
221		     device->base.domain_16 = 0xffff;
222		else
223		     device->base.domain_16 = dom;
224		device->base.bus = bus;
225		device->base.dev = dev;
226		device->base.func = func;
227
228
229		err = parse_separate_sysfs_files(& device->base);
230		if (!err)
231		    continue;
232
233		err = pci_device_linux_sysfs_read(& device->base, config, 0,
234						  48, & bytes);
235		if ((bytes == 48) && !err) {
236		    device->base.vendor_id = (uint16_t)config[0]
237			+ ((uint16_t)config[1] << 8);
238		    device->base.device_id = (uint16_t)config[2]
239			+ ((uint16_t)config[3] << 8);
240		    device->base.device_class = (uint32_t)config[9]
241			+ ((uint32_t)config[10] << 8)
242			+ ((uint32_t)config[11] << 16);
243		    device->base.revision = config[8];
244		    device->base.subvendor_id = (uint16_t)config[44]
245			+ ((uint16_t)config[45] << 8);
246		    device->base.subdevice_id = (uint16_t)config[46]
247			+ ((uint16_t)config[47] << 8);
248		}
249
250		if (err) {
251		    break;
252		}
253	    }
254	}
255	else {
256	    err = ENOMEM;
257	}
258    }
259
260    for (i = 0; i < n; i++)
261	free(devices[i]);
262    free(devices);
263
264    if (err) {
265	free(p->devices);
266	p->devices = NULL;
267	p->num_devices = 0;
268    }
269
270    return err;
271}
272
273
274static int
275pci_device_linux_sysfs_probe( struct pci_device * dev )
276{
277    char     name[256];
278    uint8_t  config[256];
279    char     resource[512];
280    int fd;
281    pciaddr_t bytes;
282    unsigned i;
283    int err;
284
285
286    err = pci_device_linux_sysfs_read( dev, config, 0, 256, & bytes );
287    if ( bytes >= 64 ) {
288	struct pci_device_private *priv = (struct pci_device_private *) dev;
289
290	dev->irq = config[60];
291	priv->header_type = config[14];
292
293
294	/* The PCI config registers can be used to obtain information
295	 * about the memory and I/O regions for the device.  However,
296	 * doing so requires some tricky parsing (to correctly handle
297	 * 64-bit memory regions) and requires writing to the config
298	 * registers.  Since we'd like to avoid having to deal with the
299	 * parsing issues and non-root users can write to PCI config
300	 * registers, we use a different file in the device's sysfs
301	 * directory called "resource".
302	 *
303	 * The resource file contains all of the needed information in
304	 * a format that is consistent across all platforms.  Each BAR
305	 * and the expansion ROM have a single line of data containing
306	 * 3, 64-bit hex values:  the first address in the region,
307	 * the last address in the region, and the region's flags.
308	 */
309	snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/resource",
310		  SYS_BUS_PCI,
311		  dev->domain,
312		  dev->bus,
313		  dev->dev,
314		  dev->func );
315	fd = open( name, O_RDONLY | O_CLOEXEC);
316	if ( fd != -1 ) {
317	    char * next;
318	    pciaddr_t  low_addr;
319	    pciaddr_t  high_addr;
320	    pciaddr_t  flags;
321
322
323	    bytes = read( fd, resource, 512 );
324	    resource[511] = '\0';
325
326	    close( fd );
327
328	    next = resource;
329	    for ( i = 0 ; i < 6 ; i++ ) {
330
331		dev->regions[i].base_addr = strtoull( next, & next, 16 );
332		high_addr = strtoull( next, & next, 16 );
333		flags = strtoull( next, & next, 16 );
334
335		if ( dev->regions[i].base_addr != 0 ) {
336		    dev->regions[i].size = (high_addr
337					    - dev->regions[i].base_addr) + 1;
338
339		    dev->regions[i].is_IO = (flags & 0x01) != 0;
340		    dev->regions[i].is_64 = (flags & 0x04) != 0;
341		    dev->regions[i].is_prefetchable = (flags & 0x08) != 0;
342		}
343	    }
344
345	    low_addr = strtoull( next, & next, 16 );
346	    high_addr = strtoull( next, & next, 16 );
347	    flags = strtoull( next, & next, 16 );
348	    if ( low_addr != 0 ) {
349		priv->rom_base = low_addr;
350		dev->rom_size = (high_addr - low_addr) + 1;
351	    }
352	}
353    }
354
355    return err;
356}
357
358
359static int
360pci_device_linux_sysfs_read_rom( struct pci_device * dev, void * buffer )
361{
362    char name[256];
363    int fd;
364    struct stat  st;
365    int err = 0;
366    size_t rom_size;
367    size_t total_bytes;
368
369
370    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/rom",
371	      SYS_BUS_PCI,
372	      dev->domain,
373	      dev->bus,
374	      dev->dev,
375	      dev->func );
376
377    fd = open( name, O_RDWR | O_CLOEXEC);
378    if ( fd == -1 ) {
379#ifdef LINUX_ROM
380	/* If reading the ROM using sysfs fails, fall back to the old
381	 * /dev/mem based interface.
382	 * disable this for newer kernels using configure
383	 */
384	return pci_device_linux_devmem_read_rom(dev, buffer);
385#else
386	return errno;
387#endif
388    }
389
390
391    if ( fstat( fd, & st ) == -1 ) {
392	close( fd );
393	return errno;
394    }
395
396    rom_size = st.st_size;
397    if ( rom_size == 0 )
398	rom_size = 0x10000;
399
400    /* This is a quirky thing on Linux.  Even though the ROM and the file
401     * for the ROM in sysfs are read-only, the string "1" must be written to
402     * the file to enable the ROM.  After the data has been read, "0" must be
403     * written to the file to disable the ROM.
404     */
405    write( fd, "1", 1 );
406    lseek( fd, 0, SEEK_SET );
407
408    for ( total_bytes = 0 ; total_bytes < rom_size ; /* empty */ ) {
409	const int bytes = read( fd, (char *) buffer + total_bytes,
410				rom_size - total_bytes );
411	if ( bytes == -1 ) {
412	    err = errno;
413	    break;
414	}
415	else if ( bytes == 0 ) {
416	    break;
417	}
418
419	total_bytes += bytes;
420    }
421
422
423    lseek( fd, 0, SEEK_SET );
424    write( fd, "0", 1 );
425
426    close( fd );
427    return err;
428}
429
430
431static int
432pci_device_linux_sysfs_read( struct pci_device * dev, void * data,
433			     pciaddr_t offset, pciaddr_t size,
434			     pciaddr_t * bytes_read )
435{
436    char name[256];
437    pciaddr_t temp_size = size;
438    int err = 0;
439    int fd;
440    char *data_bytes = data;
441
442    if ( bytes_read != NULL ) {
443	*bytes_read = 0;
444    }
445
446    /* Each device has a directory under sysfs.  Within that directory there
447     * is a file named "config".  This file used to access the PCI config
448     * space.  It is used here to obtain most of the information about the
449     * device.
450     */
451    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/config",
452	      SYS_BUS_PCI,
453	      dev->domain,
454	      dev->bus,
455	      dev->dev,
456	      dev->func );
457
458    fd = open( name, O_RDONLY | O_CLOEXEC);
459    if ( fd == -1 ) {
460	return errno;
461    }
462
463
464    while ( temp_size > 0 ) {
465	const ssize_t bytes = pread( fd, data_bytes, temp_size, offset );
466
467	/* If zero bytes were read, then we assume it's the end of the
468	 * config file.
469	 */
470	if (bytes == 0)
471	    break;
472	if ( bytes < 0 ) {
473	    err = errno;
474	    break;
475	}
476
477	temp_size -= bytes;
478	offset += bytes;
479	data_bytes += bytes;
480    }
481
482    if ( bytes_read != NULL ) {
483	*bytes_read = size - temp_size;
484    }
485
486    close( fd );
487    return err;
488}
489
490
491static int
492pci_device_linux_sysfs_write( struct pci_device * dev, const void * data,
493			     pciaddr_t offset, pciaddr_t size,
494			     pciaddr_t * bytes_written )
495{
496    char name[256];
497    pciaddr_t temp_size = size;
498    int err = 0;
499    int fd;
500    const char *data_bytes = data;
501
502    if ( bytes_written != NULL ) {
503	*bytes_written = 0;
504    }
505
506    /* Each device has a directory under sysfs.  Within that directory there
507     * is a file named "config".  This file used to access the PCI config
508     * space.  It is used here to obtain most of the information about the
509     * device.
510     */
511    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/config",
512	      SYS_BUS_PCI,
513	      dev->domain,
514	      dev->bus,
515	      dev->dev,
516	      dev->func );
517
518    fd = open( name, O_WRONLY | O_CLOEXEC);
519    if ( fd == -1 ) {
520	return errno;
521    }
522
523
524    while ( temp_size > 0 ) {
525	const ssize_t bytes = pwrite( fd, data_bytes, temp_size, offset );
526
527	/* If zero bytes were written, then we assume it's the end of the
528	 * config file.
529	 */
530	if ( bytes == 0 )
531	    break;
532	if ( bytes < 0 ) {
533	    err = errno;
534	    break;
535	}
536
537	temp_size -= bytes;
538	offset += bytes;
539	data_bytes += bytes;
540    }
541
542    if ( bytes_written != NULL ) {
543	*bytes_written = size - temp_size;
544    }
545
546    close( fd );
547    return err;
548}
549
550static int
551pci_device_linux_sysfs_map_range_wc(struct pci_device *dev,
552				    struct pci_device_mapping *map)
553{
554    char name[256];
555    int fd;
556    const int prot = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
557        ? (PROT_READ | PROT_WRITE) : PROT_READ;
558    const int open_flags = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
559        ? O_RDWR : O_RDONLY;
560    const off_t offset = map->base - dev->regions[map->region].base_addr;
561
562    snprintf(name, 255, "%s/%04x:%02x:%02x.%1u/resource%u_wc",
563	     SYS_BUS_PCI,
564	     dev->domain,
565	     dev->bus,
566	     dev->dev,
567	     dev->func,
568	     map->region);
569    fd = open(name, open_flags | O_CLOEXEC);
570    if (fd == -1)
571	    return errno;
572
573    map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
574    if (map->memory == MAP_FAILED) {
575        map->memory = NULL;
576	close(fd);
577	return errno;
578    }
579
580    close(fd);
581
582    return 0;
583}
584
585/**
586 * Map a memory region for a device using the Linux sysfs interface.
587 *
588 * \param dev   Device whose memory region is to be mapped.
589 * \param map   Parameters of the mapping that is to be created.
590 *
591 * \return
592 * Zero on success or an \c errno value on failure.
593 *
594 * \sa pci_device_map_rrange, pci_device_linux_sysfs_unmap_range
595 *
596 * \todo
597 * Some older 2.6.x kernels don't implement the resourceN files.  On those
598 * systems /dev/mem must be used.  On these systems it is also possible that
599 * \c mmap64 may need to be used.
600 */
601static int
602pci_device_linux_sysfs_map_range(struct pci_device *dev,
603                                 struct pci_device_mapping *map)
604{
605    char name[256];
606    int fd;
607    int err = 0;
608    const int prot = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
609        ? (PROT_READ | PROT_WRITE) : PROT_READ;
610    const int open_flags = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
611        ? O_RDWR : O_RDONLY;
612    const off_t offset = map->base - dev->regions[map->region].base_addr;
613#ifdef HAVE_MTRR
614    struct mtrr_sentry sentry = {
615	.base = map->base,
616        .size = map->size,
617	.type = MTRR_TYPE_UNCACHABLE
618    };
619#endif
620
621    /* For WC mappings, try sysfs resourceN_wc file first */
622    if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) &&
623	!pci_device_linux_sysfs_map_range_wc(dev, map))
624	    return 0;
625
626    snprintf(name, 255, "%s/%04x:%02x:%02x.%1u/resource%u",
627             SYS_BUS_PCI,
628             dev->domain,
629             dev->bus,
630             dev->dev,
631             dev->func,
632             map->region);
633
634    fd = open(name, open_flags | O_CLOEXEC);
635    if (fd == -1) {
636        return errno;
637    }
638
639
640    map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
641    if (map->memory == MAP_FAILED) {
642        map->memory = NULL;
643	close(fd);
644	return errno;
645    }
646
647#ifdef HAVE_MTRR
648    if ((map->flags & PCI_DEV_MAP_FLAG_CACHABLE) != 0) {
649        sentry.type = MTRR_TYPE_WRBACK;
650    } else if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) != 0) {
651        sentry.type = MTRR_TYPE_WRCOMB;
652    }
653
654    if (pci_sys->mtrr_fd != -1 && sentry.type != MTRR_TYPE_UNCACHABLE) {
655	if (ioctl(pci_sys->mtrr_fd, MTRRIOC_ADD_ENTRY, &sentry) < 0) {
656	    /* FIXME: Should we report an error in this case?
657	     */
658	    fprintf(stderr, "error setting MTRR "
659		    "(base = 0x%016" PRIx64 ", size = 0x%08x, type = %u) %s (%d)\n",
660		    (pciaddr_t)sentry.base, sentry.size, sentry.type,
661		    strerror(errno), errno);
662/*            err = errno;*/
663	}
664	/* KLUDGE ALERT -- rewrite the PTEs to turn off the CD and WT bits */
665	mprotect (map->memory, map->size, PROT_NONE);
666	err = mprotect (map->memory, map->size, PROT_READ|PROT_WRITE);
667
668	if (err != 0) {
669	    fprintf(stderr, "mprotect(PROT_READ | PROT_WRITE) failed: %s\n",
670		    strerror(errno));
671	    fprintf(stderr, "remapping without mprotect performance kludge.\n");
672
673	    munmap(map->memory, map->size);
674	    map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
675	    if (map->memory == MAP_FAILED) {
676		map->memory = NULL;
677		close(fd);
678		return errno;
679	    }
680	}
681    }
682#endif
683
684    close(fd);
685
686    return 0;
687}
688
689/**
690 * Unmap a memory region for a device using the Linux sysfs interface.
691 *
692 * \param dev   Device whose memory region is to be unmapped.
693 * \param map   Parameters of the mapping that is to be destroyed.
694 *
695 * \return
696 * Zero on success or an \c errno value on failure.
697 *
698 * \sa pci_device_map_rrange, pci_device_linux_sysfs_map_range
699 *
700 * \todo
701 * Some older 2.6.x kernels don't implement the resourceN files.  On those
702 * systems /dev/mem must be used.  On these systems it is also possible that
703 * \c mmap64 may need to be used.
704 */
705static int
706pci_device_linux_sysfs_unmap_range(struct pci_device *dev,
707				   struct pci_device_mapping *map)
708{
709    int err = 0;
710#ifdef HAVE_MTRR
711    struct mtrr_sentry sentry = {
712	.base = map->base,
713        .size = map->size,
714	.type = MTRR_TYPE_UNCACHABLE
715    };
716#endif
717
718    err = pci_device_generic_unmap_range (dev, map);
719    if (err)
720	return err;
721
722#ifdef HAVE_MTRR
723    if ((map->flags & PCI_DEV_MAP_FLAG_CACHABLE) != 0) {
724        sentry.type = MTRR_TYPE_WRBACK;
725    } else if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) != 0) {
726        sentry.type = MTRR_TYPE_WRCOMB;
727    }
728
729    if (pci_sys->mtrr_fd != -1 && sentry.type != MTRR_TYPE_UNCACHABLE) {
730	if (ioctl(pci_sys->mtrr_fd, MTRRIOC_DEL_ENTRY, &sentry) < 0) {
731	    /* FIXME: Should we report an error in this case?
732	     */
733	    fprintf(stderr, "error setting MTRR "
734		    "(base = 0x%016" PRIx64 ", size = 0x%08x, type = %u) %s (%d)\n",
735		    (pciaddr_t)sentry.base, sentry.size, sentry.type,
736		    strerror(errno), errno);
737/*            err = errno;*/
738	}
739    }
740#endif
741
742    return err;
743}
744
745static void pci_device_linux_sysfs_set_enable(struct pci_device *dev, int enable)
746{
747    char name[256];
748    int fd;
749
750    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/enable",
751	      SYS_BUS_PCI,
752	      dev->domain,
753	      dev->bus,
754	      dev->dev,
755	      dev->func );
756
757    fd = open( name, O_RDWR | O_CLOEXEC);
758    if (fd == -1)
759       return;
760
761    write( fd, enable ? "1" : "0" , 1 );
762    close(fd);
763}
764
765static void pci_device_linux_sysfs_enable(struct pci_device *dev)
766{
767	return pci_device_linux_sysfs_set_enable(dev, 1);
768}
769
770static void pci_device_linux_sysfs_disable(struct pci_device *dev)
771{
772	return pci_device_linux_sysfs_set_enable(dev, 0);
773}
774
775static int pci_device_linux_sysfs_boot_vga(struct pci_device *dev)
776{
777    char name[256];
778    char reply[3];
779    int fd, bytes_read;
780    int ret = 0;
781
782    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/boot_vga",
783	      SYS_BUS_PCI,
784	      dev->domain,
785	      dev->bus,
786	      dev->dev,
787	      dev->func );
788
789    fd = open( name, O_RDONLY | O_CLOEXEC);
790    if (fd == -1)
791       return 0;
792
793    bytes_read = read(fd, reply, 1);
794    if (bytes_read != 1)
795	goto out;
796    if (reply[0] == '1')
797	ret = 1;
798out:
799    close(fd);
800    return ret;
801}
802
803static int pci_device_linux_sysfs_has_kernel_driver(struct pci_device *dev)
804{
805    char name[256];
806    struct stat dummy;
807    int ret;
808
809    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/driver",
810	      SYS_BUS_PCI,
811	      dev->domain,
812	      dev->bus,
813	      dev->dev,
814	      dev->func );
815
816    ret = stat(name, &dummy);
817    if (ret < 0)
818	return 0;
819    return 1;
820}
821
822static struct pci_io_handle *
823pci_device_linux_sysfs_open_device_io(struct pci_io_handle *ret,
824				      struct pci_device *dev, int bar,
825				      pciaddr_t base, pciaddr_t size)
826{
827    char name[PATH_MAX];
828
829    snprintf(name, PATH_MAX, "%s/%04x:%02x:%02x.%1u/resource%d",
830	     SYS_BUS_PCI, dev->domain, dev->bus, dev->dev, dev->func, bar);
831
832    ret->fd = open(name, O_RDWR | O_CLOEXEC);
833
834    if (ret->fd < 0)
835	return NULL;
836
837    ret->base = base;
838    ret->size = size;
839    ret->is_legacy = 0;
840
841    return ret;
842}
843
844static struct pci_io_handle *
845pci_device_linux_sysfs_open_legacy_io(struct pci_io_handle *ret,
846				      struct pci_device *dev, pciaddr_t base,
847				      pciaddr_t size)
848{
849    char name[PATH_MAX];
850
851    /* First check if there's a legacy io method for the device */
852    while (dev) {
853	snprintf(name, PATH_MAX, "/sys/class/pci_bus/%04x:%02x/legacy_io",
854		 dev->domain, dev->bus);
855
856	ret->fd = open(name, O_RDWR | O_CLOEXEC);
857	if (ret->fd >= 0)
858	    break;
859
860	dev = pci_device_get_parent_bridge(dev);
861    }
862
863    /*
864     * You would think you'd want to use /dev/port here.  Don't make that
865     * mistake, /dev/port only does byte-wide i/o cycles which means it
866     * doesn't work.  If you think this is stupid, well, you're right.
867     */
868
869    /* If we've no other choice, iopl */
870    if (ret->fd < 0) {
871	if (iopl(3))
872	    return NULL;
873    }
874
875    ret->base = base;
876    ret->size = size;
877    ret->is_legacy = 1;
878
879    return ret;
880}
881
882static void
883pci_device_linux_sysfs_close_io(struct pci_device *dev,
884				struct pci_io_handle *handle)
885{
886    if (handle->fd > -1)
887	close(handle->fd);
888}
889
890static uint32_t
891pci_device_linux_sysfs_read32(struct pci_io_handle *handle, uint32_t port)
892{
893    uint32_t ret;
894
895    if (handle->fd > -1) {
896	if (handle->is_legacy)
897	    pread(handle->fd, &ret, 4, port + handle->base);
898	else
899	    pread(handle->fd, &ret, 4, port);
900    } else {
901	ret = inl(port + handle->base);
902    }
903
904    return ret;
905}
906
907static uint16_t
908pci_device_linux_sysfs_read16(struct pci_io_handle *handle, uint32_t port)
909{
910    uint16_t ret;
911
912    if (handle->fd > -1) {
913	if (handle->is_legacy)
914	    pread(handle->fd, &ret, 2, port + handle->base);
915	else
916	    pread(handle->fd, &ret, 2, port);
917    } else {
918	ret = inw(port + handle->base);
919    }
920
921    return ret;
922}
923
924static uint8_t
925pci_device_linux_sysfs_read8(struct pci_io_handle *handle, uint32_t port)
926{
927    uint8_t ret;
928
929    if (handle->fd > -1) {
930	if (handle->is_legacy)
931	    pread(handle->fd, &ret, 1, port + handle->base);
932	else
933	    pread(handle->fd, &ret, 1, port);
934    } else {
935	ret = inb(port + handle->base);
936    }
937
938    return ret;
939}
940
941static void
942pci_device_linux_sysfs_write32(struct pci_io_handle *handle, uint32_t port,
943			       uint32_t data)
944{
945    if (handle->fd > -1) {
946	if (handle->is_legacy)
947	    pwrite(handle->fd, &data, 4, port + handle->base);
948	else
949	    pwrite(handle->fd, &data, 4, port);
950    } else {
951	outl(data, port + handle->base);
952    }
953}
954
955static void
956pci_device_linux_sysfs_write16(struct pci_io_handle *handle, uint32_t port,
957			       uint16_t data)
958{
959    if (handle->fd > -1) {
960	if (handle->is_legacy)
961	    pwrite(handle->fd, &data, 2, port + handle->base);
962	else
963	    pwrite(handle->fd, &data, 2, port);
964    } else {
965	outw(data, port + handle->base);
966    }
967}
968
969static void
970pci_device_linux_sysfs_write8(struct pci_io_handle *handle, uint32_t port,
971			      uint8_t data)
972{
973    if (handle->fd > -1) {
974	if (handle->is_legacy)
975	    pwrite(handle->fd, &data, 1, port + handle->base);
976	else
977	    pwrite(handle->fd, &data, 1, port);
978    } else {
979	outb(data, port + handle->base);
980    }
981}
982
983static int
984pci_device_linux_sysfs_map_legacy(struct pci_device *dev, pciaddr_t base,
985				  pciaddr_t size, unsigned map_flags, void **addr)
986{
987    char name[PATH_MAX];
988    int flags = O_RDONLY;
989    int prot = PROT_READ;
990    int fd;
991    int ret=0;
992
993    if (map_flags & PCI_DEV_MAP_FLAG_WRITABLE) {
994	flags = O_RDWR; /* O_RDWR != O_WRONLY | O_RDONLY */
995	prot |= PROT_WRITE;
996    }
997
998    /* First check if there's a legacy memory method for the device */
999    while (dev) {
1000	snprintf(name, PATH_MAX, "/sys/class/pci_bus/%04x:%02x/legacy_mem",
1001		 dev->domain, dev->bus);
1002
1003	fd = open(name, flags | O_CLOEXEC);
1004	if (fd >= 0)
1005	    break;
1006
1007	dev = pci_device_get_parent_bridge(dev);
1008    }
1009
1010    /* If not, /dev/mem is the best we can do */
1011    if (!dev)
1012	fd = open("/dev/mem", flags | O_CLOEXEC);
1013
1014    if (fd < 0)
1015	return errno;
1016
1017    *addr = mmap(NULL, size, prot, MAP_SHARED, fd, base);
1018    if (*addr == MAP_FAILED) {
1019	ret = errno;
1020    }
1021
1022    close(fd);
1023    return ret;
1024}
1025
1026static int
1027pci_device_linux_sysfs_unmap_legacy(struct pci_device *dev, void *addr, pciaddr_t size)
1028{
1029    return munmap(addr, size);
1030}
1031
1032
1033static void
1034pci_system_linux_destroy(void)
1035{
1036#ifdef HAVE_MTRR
1037	if (pci_sys->mtrr_fd != -1)
1038		close(pci_sys->mtrr_fd);
1039#endif
1040}
1041
1042static const struct pci_system_methods linux_sysfs_methods = {
1043    .destroy = pci_system_linux_destroy,
1044    .destroy_device = NULL,
1045    .read_rom = pci_device_linux_sysfs_read_rom,
1046    .probe = pci_device_linux_sysfs_probe,
1047    .map_range = pci_device_linux_sysfs_map_range,
1048    .unmap_range = pci_device_linux_sysfs_unmap_range,
1049
1050    .read = pci_device_linux_sysfs_read,
1051    .write = pci_device_linux_sysfs_write,
1052
1053    .fill_capabilities = pci_fill_capabilities_generic,
1054    .enable = pci_device_linux_sysfs_enable,
1055    .disable = pci_device_linux_sysfs_disable,
1056    .boot_vga = pci_device_linux_sysfs_boot_vga,
1057    .has_kernel_driver = pci_device_linux_sysfs_has_kernel_driver,
1058
1059    .open_device_io = pci_device_linux_sysfs_open_device_io,
1060    .open_legacy_io = pci_device_linux_sysfs_open_legacy_io,
1061    .close_io = pci_device_linux_sysfs_close_io,
1062    .read32 = pci_device_linux_sysfs_read32,
1063    .read16 = pci_device_linux_sysfs_read16,
1064    .read8 = pci_device_linux_sysfs_read8,
1065    .write32 = pci_device_linux_sysfs_write32,
1066    .write16 = pci_device_linux_sysfs_write16,
1067    .write8 = pci_device_linux_sysfs_write8,
1068
1069    .map_legacy = pci_device_linux_sysfs_map_legacy,
1070    .unmap_legacy = pci_device_linux_sysfs_unmap_legacy,
1071};
1072