linux_sysfs.c revision 49f872b5
14f5e7dd7Smrg/*
24f5e7dd7Smrg * (C) Copyright IBM Corporation 2006
34f5e7dd7Smrg * All Rights Reserved.
44f5e7dd7Smrg *
54f5e7dd7Smrg * Permission is hereby granted, free of charge, to any person obtaining a
64f5e7dd7Smrg * copy of this software and associated documentation files (the "Software"),
74f5e7dd7Smrg * to deal in the Software without restriction, including without limitation
84f5e7dd7Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub
94f5e7dd7Smrg * license, and/or sell copies of the Software, and to permit persons to whom
104f5e7dd7Smrg * the Software is furnished to do so, subject to the following conditions:
114f5e7dd7Smrg *
124f5e7dd7Smrg * The above copyright notice and this permission notice (including the next
134f5e7dd7Smrg * paragraph) shall be included in all copies or substantial portions of the
144f5e7dd7Smrg * Software.
154f5e7dd7Smrg *
164f5e7dd7Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
174f5e7dd7Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
184f5e7dd7Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
194f5e7dd7Smrg * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
204f5e7dd7Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
214f5e7dd7Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
224f5e7dd7Smrg * DEALINGS IN THE SOFTWARE.
234f5e7dd7Smrg */
244f5e7dd7Smrg
254f5e7dd7Smrg/**
264f5e7dd7Smrg * \file linux_sysfs.c
274f5e7dd7Smrg * Access PCI subsystem using Linux's sysfs interface.  This interface is
284f5e7dd7Smrg * available starting somewhere in the late 2.5.x kernel phase, and is the
294f5e7dd7Smrg * preferred method on all 2.6.x kernels.
304f5e7dd7Smrg *
314f5e7dd7Smrg * \author Ian Romanick <idr@us.ibm.com>
324f5e7dd7Smrg */
334f5e7dd7Smrg
344f5e7dd7Smrg#define _GNU_SOURCE
354f5e7dd7Smrg
364f5e7dd7Smrg#include <stdlib.h>
374f5e7dd7Smrg#include <string.h>
384f5e7dd7Smrg#include <stdio.h>
394f5e7dd7Smrg#include <unistd.h>
404f5e7dd7Smrg#include <sys/types.h>
414f5e7dd7Smrg#include <sys/stat.h>
424f5e7dd7Smrg#include <fcntl.h>
434f5e7dd7Smrg#include <sys/mman.h>
444f5e7dd7Smrg#include <dirent.h>
454f5e7dd7Smrg#include <errno.h>
464f5e7dd7Smrg
474f5e7dd7Smrg#include "config.h"
484f5e7dd7Smrg
494f5e7dd7Smrg#ifdef HAVE_MTRR
504f5e7dd7Smrg#include <asm/mtrr.h>
514f5e7dd7Smrg#include <sys/ioctl.h>
524f5e7dd7Smrg#endif
534f5e7dd7Smrg
544f5e7dd7Smrg#include "pciaccess.h"
554f5e7dd7Smrg#include "pciaccess_private.h"
564f5e7dd7Smrg#include "linux_devmem.h"
574f5e7dd7Smrg
584f5e7dd7Smrgstatic void pci_device_linux_sysfs_enable(struct pci_device *dev);
594f5e7dd7Smrg
604f5e7dd7Smrgstatic int pci_device_linux_sysfs_read_rom( struct pci_device * dev,
614f5e7dd7Smrg    void * buffer );
624f5e7dd7Smrg
634f5e7dd7Smrgstatic int pci_device_linux_sysfs_probe( struct pci_device * dev );
644f5e7dd7Smrg
654f5e7dd7Smrgstatic int pci_device_linux_sysfs_map_range(struct pci_device *dev,
664f5e7dd7Smrg    struct pci_device_mapping *map);
674f5e7dd7Smrg
684f5e7dd7Smrgstatic int pci_device_linux_sysfs_unmap_range(struct pci_device *dev,
694f5e7dd7Smrg    struct pci_device_mapping *map);
704f5e7dd7Smrg
714f5e7dd7Smrgstatic int pci_device_linux_sysfs_read( struct pci_device * dev, void * data,
724f5e7dd7Smrg    pciaddr_t offset, pciaddr_t size, pciaddr_t * bytes_read );
734f5e7dd7Smrg
744f5e7dd7Smrgstatic int pci_device_linux_sysfs_write( struct pci_device * dev,
754f5e7dd7Smrg    const void * data, pciaddr_t offset, pciaddr_t size,
764f5e7dd7Smrg    pciaddr_t * bytes_written );
774f5e7dd7Smrg
7849f872b5Smrgstatic int pci_device_linux_sysfs_boot_vga( struct pci_device * dev );
7949f872b5Smrgstatic int pci_device_linux_sysfs_has_kernel_driver(struct pci_device *dev);
8049f872b5Smrg
814f5e7dd7Smrgstatic const struct pci_system_methods linux_sysfs_methods = {
824f5e7dd7Smrg    .destroy = NULL,
834f5e7dd7Smrg    .destroy_device = NULL,
844f5e7dd7Smrg    .read_rom = pci_device_linux_sysfs_read_rom,
854f5e7dd7Smrg    .probe = pci_device_linux_sysfs_probe,
864f5e7dd7Smrg    .map_range = pci_device_linux_sysfs_map_range,
874f5e7dd7Smrg    .unmap_range = pci_device_linux_sysfs_unmap_range,
884f5e7dd7Smrg
894f5e7dd7Smrg    .read = pci_device_linux_sysfs_read,
904f5e7dd7Smrg    .write = pci_device_linux_sysfs_write,
914f5e7dd7Smrg
924f5e7dd7Smrg    .fill_capabilities = pci_fill_capabilities_generic,
934f5e7dd7Smrg    .enable = pci_device_linux_sysfs_enable,
9449f872b5Smrg    .boot_vga = pci_device_linux_sysfs_boot_vga,
9549f872b5Smrg    .has_kernel_driver = pci_device_linux_sysfs_has_kernel_driver,
964f5e7dd7Smrg};
974f5e7dd7Smrg
984f5e7dd7Smrg#define SYS_BUS_PCI "/sys/bus/pci/devices"
994f5e7dd7Smrg
1004f5e7dd7Smrg
1014f5e7dd7Smrgstatic int populate_entries(struct pci_system * pci_sys);
1024f5e7dd7Smrg
1034f5e7dd7Smrg
1044f5e7dd7Smrg/**
1054f5e7dd7Smrg * Attempt to access PCI subsystem using Linux's sysfs interface.
1064f5e7dd7Smrg */
1074f5e7dd7Smrg_pci_hidden int
1084f5e7dd7Smrgpci_system_linux_sysfs_create( void )
1094f5e7dd7Smrg{
1104f5e7dd7Smrg    int err = 0;
1114f5e7dd7Smrg    struct stat st;
1124f5e7dd7Smrg
1134f5e7dd7Smrg
1144f5e7dd7Smrg    /* If the directory "/sys/bus/pci/devices" exists, then the PCI subsystem
1154f5e7dd7Smrg     * can be accessed using this interface.
1164f5e7dd7Smrg     */
1174f5e7dd7Smrg
1184f5e7dd7Smrg    if ( stat( SYS_BUS_PCI, & st ) == 0 ) {
1194f5e7dd7Smrg	pci_sys = calloc( 1, sizeof( struct pci_system ) );
1204f5e7dd7Smrg	if ( pci_sys != NULL ) {
1214f5e7dd7Smrg	    pci_sys->methods = & linux_sysfs_methods;
1224f5e7dd7Smrg#ifdef HAVE_MTRR
1234f5e7dd7Smrg	    pci_sys->mtrr_fd = open("/proc/mtrr", O_WRONLY);
1244f5e7dd7Smrg#endif
1254f5e7dd7Smrg	    err = populate_entries(pci_sys);
1264f5e7dd7Smrg	}
1274f5e7dd7Smrg	else {
1284f5e7dd7Smrg	    err = ENOMEM;
1294f5e7dd7Smrg	}
1304f5e7dd7Smrg    }
1314f5e7dd7Smrg    else {
1324f5e7dd7Smrg	err = errno;
1334f5e7dd7Smrg    }
1344f5e7dd7Smrg
1354f5e7dd7Smrg    return err;
1364f5e7dd7Smrg}
1374f5e7dd7Smrg
1384f5e7dd7Smrg
1394f5e7dd7Smrg/**
1404f5e7dd7Smrg * Filter out the names "." and ".." from the scanned sysfs entries.
1414f5e7dd7Smrg *
1424f5e7dd7Smrg * \param d  Directory entry being processed by \c scandir.
1434f5e7dd7Smrg *
1444f5e7dd7Smrg * \return
1454f5e7dd7Smrg * Zero if the entry name matches either "." or "..", non-zero otherwise.
1464f5e7dd7Smrg *
1474f5e7dd7Smrg * \sa scandir, populate_entries
1484f5e7dd7Smrg */
1494f5e7dd7Smrgstatic int
1504f5e7dd7Smrgscan_sys_pci_filter( const struct dirent * d )
1514f5e7dd7Smrg{
1524f5e7dd7Smrg    return !((strcmp( d->d_name, "." ) == 0)
1534f5e7dd7Smrg	     || (strcmp( d->d_name, ".." ) == 0));
1544f5e7dd7Smrg}
1554f5e7dd7Smrg
1564f5e7dd7Smrg
1574f5e7dd7Smrgint
1584f5e7dd7Smrgpopulate_entries( struct pci_system * p )
1594f5e7dd7Smrg{
1604f5e7dd7Smrg    struct dirent ** devices;
1614f5e7dd7Smrg    int n;
1624f5e7dd7Smrg    int i;
1634f5e7dd7Smrg    int err = 0;
1644f5e7dd7Smrg
1654f5e7dd7Smrg
1664f5e7dd7Smrg    n = scandir( SYS_BUS_PCI, & devices, scan_sys_pci_filter, alphasort );
1674f5e7dd7Smrg    if ( n > 0 ) {
1684f5e7dd7Smrg	p->num_devices = n;
1694f5e7dd7Smrg	p->devices = calloc( n, sizeof( struct pci_device_private ) );
1704f5e7dd7Smrg
1714f5e7dd7Smrg	if (p->devices != NULL) {
1724f5e7dd7Smrg	    for (i = 0 ; i < n ; i++) {
1734f5e7dd7Smrg		uint8_t config[48];
1744f5e7dd7Smrg		pciaddr_t bytes;
1754f5e7dd7Smrg		unsigned dom, bus, dev, func;
1764f5e7dd7Smrg		struct pci_device_private *device =
1774f5e7dd7Smrg			(struct pci_device_private *) &p->devices[i];
1784f5e7dd7Smrg
1794f5e7dd7Smrg
1804f5e7dd7Smrg		sscanf(devices[i]->d_name, "%04x:%02x:%02x.%1u",
1814f5e7dd7Smrg		       & dom, & bus, & dev, & func);
1824f5e7dd7Smrg
1834f5e7dd7Smrg		device->base.domain = dom;
1844f5e7dd7Smrg		device->base.bus = bus;
1854f5e7dd7Smrg		device->base.dev = dev;
1864f5e7dd7Smrg		device->base.func = func;
1874f5e7dd7Smrg
1884f5e7dd7Smrg
1894f5e7dd7Smrg		err = pci_device_linux_sysfs_read(& device->base, config, 0,
1904f5e7dd7Smrg						  48, & bytes);
1914f5e7dd7Smrg		if ((bytes == 48) && !err) {
1924f5e7dd7Smrg		    device->base.vendor_id = (uint16_t)config[0]
1934f5e7dd7Smrg			+ ((uint16_t)config[1] << 8);
1944f5e7dd7Smrg		    device->base.device_id = (uint16_t)config[2]
1954f5e7dd7Smrg			+ ((uint16_t)config[3] << 8);
1964f5e7dd7Smrg		    device->base.device_class = (uint32_t)config[9]
1974f5e7dd7Smrg			+ ((uint32_t)config[10] << 8)
1984f5e7dd7Smrg			+ ((uint32_t)config[11] << 16);
1994f5e7dd7Smrg		    device->base.revision = config[8];
2004f5e7dd7Smrg		    device->base.subvendor_id = (uint16_t)config[44]
2014f5e7dd7Smrg			+ ((uint16_t)config[45] << 8);
2024f5e7dd7Smrg		    device->base.subdevice_id = (uint16_t)config[46]
2034f5e7dd7Smrg			+ ((uint16_t)config[47] << 8);
2044f5e7dd7Smrg		}
2054f5e7dd7Smrg
2064f5e7dd7Smrg		if (err) {
2074f5e7dd7Smrg		    break;
2084f5e7dd7Smrg		}
2094f5e7dd7Smrg	    }
2104f5e7dd7Smrg	}
2114f5e7dd7Smrg	else {
2124f5e7dd7Smrg	    err = ENOMEM;
2134f5e7dd7Smrg	}
2144f5e7dd7Smrg    }
2154f5e7dd7Smrg
21649f872b5Smrg    for (i = 0; i < n; i++)
21749f872b5Smrg	free(devices[i]);
21849f872b5Smrg    free(devices);
21949f872b5Smrg
2204f5e7dd7Smrg    if (err) {
2214f5e7dd7Smrg	free(p->devices);
2224f5e7dd7Smrg	p->devices = NULL;
2234f5e7dd7Smrg    }
2244f5e7dd7Smrg
2254f5e7dd7Smrg    return err;
2264f5e7dd7Smrg}
2274f5e7dd7Smrg
2284f5e7dd7Smrg
2294f5e7dd7Smrgstatic int
2304f5e7dd7Smrgpci_device_linux_sysfs_probe( struct pci_device * dev )
2314f5e7dd7Smrg{
2324f5e7dd7Smrg    char     name[256];
2334f5e7dd7Smrg    uint8_t  config[256];
2344f5e7dd7Smrg    char     resource[512];
2354f5e7dd7Smrg    int fd;
2364f5e7dd7Smrg    pciaddr_t bytes;
2374f5e7dd7Smrg    unsigned i;
2384f5e7dd7Smrg    int err;
2394f5e7dd7Smrg
2404f5e7dd7Smrg
2414f5e7dd7Smrg    err = pci_device_linux_sysfs_read( dev, config, 0, 256, & bytes );
2424f5e7dd7Smrg    if ( bytes >= 64 ) {
2434f5e7dd7Smrg	struct pci_device_private *priv = (struct pci_device_private *) dev;
2444f5e7dd7Smrg
2454f5e7dd7Smrg	dev->irq = config[60];
2464f5e7dd7Smrg	priv->header_type = config[14];
2474f5e7dd7Smrg
2484f5e7dd7Smrg
2494f5e7dd7Smrg	/* The PCI config registers can be used to obtain information
2504f5e7dd7Smrg	 * about the memory and I/O regions for the device.  However,
2514f5e7dd7Smrg	 * doing so requires some tricky parsing (to correctly handle
2524f5e7dd7Smrg	 * 64-bit memory regions) and requires writing to the config
2534f5e7dd7Smrg	 * registers.  Since we'd like to avoid having to deal with the
2544f5e7dd7Smrg	 * parsing issues and non-root users can write to PCI config
2554f5e7dd7Smrg	 * registers, we use a different file in the device's sysfs
2564f5e7dd7Smrg	 * directory called "resource".
2574f5e7dd7Smrg	 *
2584f5e7dd7Smrg	 * The resource file contains all of the needed information in
2594f5e7dd7Smrg	 * a format that is consistent across all platforms.  Each BAR
2604f5e7dd7Smrg	 * and the expansion ROM have a single line of data containing
2614f5e7dd7Smrg	 * 3, 64-bit hex values:  the first address in the region,
2624f5e7dd7Smrg	 * the last address in the region, and the region's flags.
2634f5e7dd7Smrg	 */
2644f5e7dd7Smrg	snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/resource",
2654f5e7dd7Smrg		  SYS_BUS_PCI,
2664f5e7dd7Smrg		  dev->domain,
2674f5e7dd7Smrg		  dev->bus,
2684f5e7dd7Smrg		  dev->dev,
2694f5e7dd7Smrg		  dev->func );
2704f5e7dd7Smrg	fd = open( name, O_RDONLY );
2714f5e7dd7Smrg	if ( fd != -1 ) {
2724f5e7dd7Smrg	    char * next;
2734f5e7dd7Smrg	    pciaddr_t  low_addr;
2744f5e7dd7Smrg	    pciaddr_t  high_addr;
2754f5e7dd7Smrg	    pciaddr_t  flags;
2764f5e7dd7Smrg
2774f5e7dd7Smrg
2784f5e7dd7Smrg	    bytes = read( fd, resource, 512 );
2794f5e7dd7Smrg	    resource[511] = '\0';
2804f5e7dd7Smrg
2814f5e7dd7Smrg	    close( fd );
2824f5e7dd7Smrg
2834f5e7dd7Smrg	    next = resource;
2844f5e7dd7Smrg	    for ( i = 0 ; i < 6 ; i++ ) {
2854f5e7dd7Smrg
2864f5e7dd7Smrg		dev->regions[i].base_addr = strtoull( next, & next, 16 );
2874f5e7dd7Smrg		high_addr = strtoull( next, & next, 16 );
2884f5e7dd7Smrg		flags = strtoull( next, & next, 16 );
2894f5e7dd7Smrg
2904f5e7dd7Smrg		if ( dev->regions[i].base_addr != 0 ) {
2914f5e7dd7Smrg		    dev->regions[i].size = (high_addr
2924f5e7dd7Smrg					    - dev->regions[i].base_addr) + 1;
2934f5e7dd7Smrg
2944f5e7dd7Smrg		    dev->regions[i].is_IO = (flags & 0x01);
2954f5e7dd7Smrg		    dev->regions[i].is_64 = (flags & 0x04);
2964f5e7dd7Smrg		    dev->regions[i].is_prefetchable = (flags & 0x08);
2974f5e7dd7Smrg		}
2984f5e7dd7Smrg	    }
2994f5e7dd7Smrg
3004f5e7dd7Smrg	    low_addr = strtoull( next, & next, 16 );
3014f5e7dd7Smrg	    high_addr = strtoull( next, & next, 16 );
3024f5e7dd7Smrg	    flags = strtoull( next, & next, 16 );
3034f5e7dd7Smrg	    if ( low_addr != 0 ) {
3044f5e7dd7Smrg		priv->rom_base = low_addr;
3054f5e7dd7Smrg		dev->rom_size = (high_addr - low_addr) + 1;
3064f5e7dd7Smrg	    }
3074f5e7dd7Smrg	}
3084f5e7dd7Smrg    }
3094f5e7dd7Smrg
3104f5e7dd7Smrg    return err;
3114f5e7dd7Smrg}
3124f5e7dd7Smrg
3134f5e7dd7Smrg
3144f5e7dd7Smrgstatic int
3154f5e7dd7Smrgpci_device_linux_sysfs_read_rom( struct pci_device * dev, void * buffer )
3164f5e7dd7Smrg{
3174f5e7dd7Smrg    char name[256];
3184f5e7dd7Smrg    int fd;
3194f5e7dd7Smrg    struct stat  st;
3204f5e7dd7Smrg    int err = 0;
3214f5e7dd7Smrg    size_t rom_size;
3224f5e7dd7Smrg    size_t total_bytes;
3234f5e7dd7Smrg
3244f5e7dd7Smrg
3254f5e7dd7Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/rom",
3264f5e7dd7Smrg	      SYS_BUS_PCI,
3274f5e7dd7Smrg	      dev->domain,
3284f5e7dd7Smrg	      dev->bus,
3294f5e7dd7Smrg	      dev->dev,
3304f5e7dd7Smrg	      dev->func );
3314f5e7dd7Smrg
3324f5e7dd7Smrg    fd = open( name, O_RDWR );
3334f5e7dd7Smrg    if ( fd == -1 ) {
33449f872b5Smrg#ifdef LINUX_ROM
3354f5e7dd7Smrg	/* If reading the ROM using sysfs fails, fall back to the old
3364f5e7dd7Smrg	 * /dev/mem based interface.
33749f872b5Smrg	 * disable this for newer kernels using configure
3384f5e7dd7Smrg	 */
3394f5e7dd7Smrg	return pci_device_linux_devmem_read_rom(dev, buffer);
34049f872b5Smrg#else
34149f872b5Smrg	return errno;
34249f872b5Smrg#endif
3434f5e7dd7Smrg    }
3444f5e7dd7Smrg
3454f5e7dd7Smrg
3464f5e7dd7Smrg    if ( fstat( fd, & st ) == -1 ) {
3474f5e7dd7Smrg	close( fd );
3484f5e7dd7Smrg	return errno;
3494f5e7dd7Smrg    }
3504f5e7dd7Smrg
3514f5e7dd7Smrg    rom_size = st.st_size;
3524f5e7dd7Smrg    if ( rom_size == 0 )
3534f5e7dd7Smrg	rom_size = 0x10000;
3544f5e7dd7Smrg
3554f5e7dd7Smrg    /* This is a quirky thing on Linux.  Even though the ROM and the file
3564f5e7dd7Smrg     * for the ROM in sysfs are read-only, the string "1" must be written to
3574f5e7dd7Smrg     * the file to enable the ROM.  After the data has been read, "0" must be
3584f5e7dd7Smrg     * written to the file to disable the ROM.
3594f5e7dd7Smrg     */
3604f5e7dd7Smrg    write( fd, "1", 1 );
3614f5e7dd7Smrg    lseek( fd, 0, SEEK_SET );
3624f5e7dd7Smrg
3634f5e7dd7Smrg    for ( total_bytes = 0 ; total_bytes < rom_size ; /* empty */ ) {
3644f5e7dd7Smrg	const int bytes = read( fd, (char *) buffer + total_bytes,
3654f5e7dd7Smrg				rom_size - total_bytes );
3664f5e7dd7Smrg	if ( bytes == -1 ) {
3674f5e7dd7Smrg	    err = errno;
3684f5e7dd7Smrg	    break;
3694f5e7dd7Smrg	}
3704f5e7dd7Smrg	else if ( bytes == 0 ) {
3714f5e7dd7Smrg	    break;
3724f5e7dd7Smrg	}
3734f5e7dd7Smrg
3744f5e7dd7Smrg	total_bytes += bytes;
3754f5e7dd7Smrg    }
3764f5e7dd7Smrg
3774f5e7dd7Smrg
3784f5e7dd7Smrg    lseek( fd, 0, SEEK_SET );
3794f5e7dd7Smrg    write( fd, "0", 1 );
3804f5e7dd7Smrg
3814f5e7dd7Smrg    close( fd );
3824f5e7dd7Smrg    return err;
3834f5e7dd7Smrg}
3844f5e7dd7Smrg
3854f5e7dd7Smrg
3864f5e7dd7Smrgstatic int
3874f5e7dd7Smrgpci_device_linux_sysfs_read( struct pci_device * dev, void * data,
3884f5e7dd7Smrg			     pciaddr_t offset, pciaddr_t size,
3894f5e7dd7Smrg			     pciaddr_t * bytes_read )
3904f5e7dd7Smrg{
3914f5e7dd7Smrg    char name[256];
3924f5e7dd7Smrg    pciaddr_t temp_size = size;
3934f5e7dd7Smrg    int err = 0;
3944f5e7dd7Smrg    int fd;
3954f5e7dd7Smrg    char *data_bytes = data;
3964f5e7dd7Smrg
3974f5e7dd7Smrg    if ( bytes_read != NULL ) {
3984f5e7dd7Smrg	*bytes_read = 0;
3994f5e7dd7Smrg    }
4004f5e7dd7Smrg
4014f5e7dd7Smrg    /* Each device has a directory under sysfs.  Within that directory there
4024f5e7dd7Smrg     * is a file named "config".  This file used to access the PCI config
4034f5e7dd7Smrg     * space.  It is used here to obtain most of the information about the
4044f5e7dd7Smrg     * device.
4054f5e7dd7Smrg     */
4064f5e7dd7Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/config",
4074f5e7dd7Smrg	      SYS_BUS_PCI,
4084f5e7dd7Smrg	      dev->domain,
4094f5e7dd7Smrg	      dev->bus,
4104f5e7dd7Smrg	      dev->dev,
4114f5e7dd7Smrg	      dev->func );
4124f5e7dd7Smrg
4134f5e7dd7Smrg    fd = open( name, O_RDONLY );
4144f5e7dd7Smrg    if ( fd == -1 ) {
4154f5e7dd7Smrg	return errno;
4164f5e7dd7Smrg    }
4174f5e7dd7Smrg
4184f5e7dd7Smrg
4194f5e7dd7Smrg    while ( temp_size > 0 ) {
4204f5e7dd7Smrg	const ssize_t bytes = pread64( fd, data_bytes, temp_size, offset );
4214f5e7dd7Smrg
4224f5e7dd7Smrg	/* If zero bytes were read, then we assume it's the end of the
4234f5e7dd7Smrg	 * config file.
4244f5e7dd7Smrg	 */
4254f5e7dd7Smrg	if ( bytes <= 0 ) {
4264f5e7dd7Smrg	    err = errno;
4274f5e7dd7Smrg	    break;
4284f5e7dd7Smrg	}
4294f5e7dd7Smrg
4304f5e7dd7Smrg	temp_size -= bytes;
4314f5e7dd7Smrg	offset += bytes;
4324f5e7dd7Smrg	data_bytes += bytes;
4334f5e7dd7Smrg    }
4344f5e7dd7Smrg
4354f5e7dd7Smrg    if ( bytes_read != NULL ) {
4364f5e7dd7Smrg	*bytes_read = size - temp_size;
4374f5e7dd7Smrg    }
4384f5e7dd7Smrg
4394f5e7dd7Smrg    close( fd );
4404f5e7dd7Smrg    return err;
4414f5e7dd7Smrg}
4424f5e7dd7Smrg
4434f5e7dd7Smrg
4444f5e7dd7Smrgstatic int
4454f5e7dd7Smrgpci_device_linux_sysfs_write( struct pci_device * dev, const void * data,
4464f5e7dd7Smrg			     pciaddr_t offset, pciaddr_t size,
4474f5e7dd7Smrg			     pciaddr_t * bytes_written )
4484f5e7dd7Smrg{
4494f5e7dd7Smrg    char name[256];
4504f5e7dd7Smrg    pciaddr_t temp_size = size;
4514f5e7dd7Smrg    int err = 0;
4524f5e7dd7Smrg    int fd;
4534f5e7dd7Smrg    const char *data_bytes = data;
4544f5e7dd7Smrg
4554f5e7dd7Smrg    if ( bytes_written != NULL ) {
4564f5e7dd7Smrg	*bytes_written = 0;
4574f5e7dd7Smrg    }
4584f5e7dd7Smrg
4594f5e7dd7Smrg    /* Each device has a directory under sysfs.  Within that directory there
4604f5e7dd7Smrg     * is a file named "config".  This file used to access the PCI config
4614f5e7dd7Smrg     * space.  It is used here to obtain most of the information about the
4624f5e7dd7Smrg     * device.
4634f5e7dd7Smrg     */
4644f5e7dd7Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/config",
4654f5e7dd7Smrg	      SYS_BUS_PCI,
4664f5e7dd7Smrg	      dev->domain,
4674f5e7dd7Smrg	      dev->bus,
4684f5e7dd7Smrg	      dev->dev,
4694f5e7dd7Smrg	      dev->func );
4704f5e7dd7Smrg
4714f5e7dd7Smrg    fd = open( name, O_WRONLY );
4724f5e7dd7Smrg    if ( fd == -1 ) {
4734f5e7dd7Smrg	return errno;
4744f5e7dd7Smrg    }
4754f5e7dd7Smrg
4764f5e7dd7Smrg
4774f5e7dd7Smrg    while ( temp_size > 0 ) {
4784f5e7dd7Smrg	const ssize_t bytes = pwrite64( fd, data_bytes, temp_size, offset );
4794f5e7dd7Smrg
4804f5e7dd7Smrg	/* If zero bytes were written, then we assume it's the end of the
4814f5e7dd7Smrg	 * config file.
4824f5e7dd7Smrg	 */
4834f5e7dd7Smrg	if ( bytes <= 0 ) {
4844f5e7dd7Smrg	    err = errno;
4854f5e7dd7Smrg	    break;
4864f5e7dd7Smrg	}
4874f5e7dd7Smrg
4884f5e7dd7Smrg	temp_size -= bytes;
4894f5e7dd7Smrg	offset += bytes;
4904f5e7dd7Smrg	data_bytes += bytes;
4914f5e7dd7Smrg    }
4924f5e7dd7Smrg
4934f5e7dd7Smrg    if ( bytes_written != NULL ) {
4944f5e7dd7Smrg	*bytes_written = size - temp_size;
4954f5e7dd7Smrg    }
4964f5e7dd7Smrg
4974f5e7dd7Smrg    close( fd );
4984f5e7dd7Smrg    return err;
4994f5e7dd7Smrg}
5004f5e7dd7Smrg
5014f5e7dd7Smrgstatic int
5024f5e7dd7Smrgpci_device_linux_sysfs_map_range_wc(struct pci_device *dev,
5034f5e7dd7Smrg				    struct pci_device_mapping *map)
5044f5e7dd7Smrg{
5054f5e7dd7Smrg    char name[256];
5064f5e7dd7Smrg    int fd;
5074f5e7dd7Smrg    const int prot = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
5084f5e7dd7Smrg        ? (PROT_READ | PROT_WRITE) : PROT_READ;
5094f5e7dd7Smrg    const int open_flags = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
5104f5e7dd7Smrg        ? O_RDWR : O_RDONLY;
5114f5e7dd7Smrg    const off_t offset = map->base - dev->regions[map->region].base_addr;
5124f5e7dd7Smrg
5134f5e7dd7Smrg    snprintf(name, 255, "%s/%04x:%02x:%02x.%1u/resource%u_wc",
5144f5e7dd7Smrg	     SYS_BUS_PCI,
5154f5e7dd7Smrg	     dev->domain,
5164f5e7dd7Smrg	     dev->bus,
5174f5e7dd7Smrg	     dev->dev,
5184f5e7dd7Smrg	     dev->func,
5194f5e7dd7Smrg	     map->region);
5204f5e7dd7Smrg    fd = open(name, open_flags);
5214f5e7dd7Smrg    if (fd == -1)
5224f5e7dd7Smrg	    return errno;
5234f5e7dd7Smrg
5244f5e7dd7Smrg    map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
5254f5e7dd7Smrg    if (map->memory == MAP_FAILED) {
5264f5e7dd7Smrg        map->memory = NULL;
5274f5e7dd7Smrg	close(fd);
5284f5e7dd7Smrg	return errno;
5294f5e7dd7Smrg    }
5304f5e7dd7Smrg
5314f5e7dd7Smrg    close(fd);
5324f5e7dd7Smrg
5334f5e7dd7Smrg    return 0;
5344f5e7dd7Smrg}
5354f5e7dd7Smrg
5364f5e7dd7Smrg/**
5374f5e7dd7Smrg * Map a memory region for a device using the Linux sysfs interface.
5384f5e7dd7Smrg *
5394f5e7dd7Smrg * \param dev   Device whose memory region is to be mapped.
5404f5e7dd7Smrg * \param map   Parameters of the mapping that is to be created.
5414f5e7dd7Smrg *
5424f5e7dd7Smrg * \return
5434f5e7dd7Smrg * Zero on success or an \c errno value on failure.
5444f5e7dd7Smrg *
5454f5e7dd7Smrg * \sa pci_device_map_rrange, pci_device_linux_sysfs_unmap_range
5464f5e7dd7Smrg *
5474f5e7dd7Smrg * \todo
5484f5e7dd7Smrg * Some older 2.6.x kernels don't implement the resourceN files.  On those
5494f5e7dd7Smrg * systems /dev/mem must be used.  On these systems it is also possible that
5504f5e7dd7Smrg * \c mmap64 may need to be used.
5514f5e7dd7Smrg */
5524f5e7dd7Smrgstatic int
5534f5e7dd7Smrgpci_device_linux_sysfs_map_range(struct pci_device *dev,
5544f5e7dd7Smrg                                 struct pci_device_mapping *map)
5554f5e7dd7Smrg{
5564f5e7dd7Smrg    char name[256];
5574f5e7dd7Smrg    int fd;
5584f5e7dd7Smrg    int err = 0;
5594f5e7dd7Smrg    const int prot = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
5604f5e7dd7Smrg        ? (PROT_READ | PROT_WRITE) : PROT_READ;
5614f5e7dd7Smrg    const int open_flags = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
5624f5e7dd7Smrg        ? O_RDWR : O_RDONLY;
5634f5e7dd7Smrg    const off_t offset = map->base - dev->regions[map->region].base_addr;
5644f5e7dd7Smrg#ifdef HAVE_MTRR
5654f5e7dd7Smrg    struct mtrr_sentry sentry = {
5664f5e7dd7Smrg	.base = map->base,
5674f5e7dd7Smrg        .size = map->size,
5684f5e7dd7Smrg	.type = MTRR_TYPE_UNCACHABLE
5694f5e7dd7Smrg    };
5704f5e7dd7Smrg#endif
5714f5e7dd7Smrg
5724f5e7dd7Smrg    /* For WC mappings, try sysfs resourceN_wc file first */
5734f5e7dd7Smrg    if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) &&
5744f5e7dd7Smrg	!pci_device_linux_sysfs_map_range_wc(dev, map))
5754f5e7dd7Smrg	    return 0;
5764f5e7dd7Smrg
5774f5e7dd7Smrg    snprintf(name, 255, "%s/%04x:%02x:%02x.%1u/resource%u",
5784f5e7dd7Smrg             SYS_BUS_PCI,
5794f5e7dd7Smrg             dev->domain,
5804f5e7dd7Smrg             dev->bus,
5814f5e7dd7Smrg             dev->dev,
5824f5e7dd7Smrg             dev->func,
5834f5e7dd7Smrg             map->region);
5844f5e7dd7Smrg
5854f5e7dd7Smrg    fd = open(name, open_flags);
5864f5e7dd7Smrg    if (fd == -1) {
5874f5e7dd7Smrg        return errno;
5884f5e7dd7Smrg    }
5894f5e7dd7Smrg
5904f5e7dd7Smrg
5914f5e7dd7Smrg    map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
5924f5e7dd7Smrg    if (map->memory == MAP_FAILED) {
5934f5e7dd7Smrg        map->memory = NULL;
5944f5e7dd7Smrg	close(fd);
5954f5e7dd7Smrg	return errno;
5964f5e7dd7Smrg    }
5974f5e7dd7Smrg
5984f5e7dd7Smrg#ifdef HAVE_MTRR
5994f5e7dd7Smrg    if ((map->flags & PCI_DEV_MAP_FLAG_CACHABLE) != 0) {
6004f5e7dd7Smrg        sentry.type = MTRR_TYPE_WRBACK;
6014f5e7dd7Smrg    } else if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) != 0) {
6024f5e7dd7Smrg        sentry.type = MTRR_TYPE_WRCOMB;
6034f5e7dd7Smrg    }
6044f5e7dd7Smrg
6054f5e7dd7Smrg    if (pci_sys->mtrr_fd != -1 && sentry.type != MTRR_TYPE_UNCACHABLE) {
6064f5e7dd7Smrg	if (ioctl(pci_sys->mtrr_fd, MTRRIOC_ADD_ENTRY, &sentry) < 0) {
6074f5e7dd7Smrg	    /* FIXME: Should we report an error in this case?
6084f5e7dd7Smrg	     */
6094f5e7dd7Smrg	    fprintf(stderr, "error setting MTRR "
6104f5e7dd7Smrg		    "(base = 0x%08lx, size = 0x%08x, type = %u) %s (%d)\n",
6114f5e7dd7Smrg		    sentry.base, sentry.size, sentry.type,
6124f5e7dd7Smrg		    strerror(errno), errno);
6134f5e7dd7Smrg/*            err = errno;*/
6144f5e7dd7Smrg	}
6154f5e7dd7Smrg	/* KLUDGE ALERT -- rewrite the PTEs to turn off the CD and WT bits */
6164f5e7dd7Smrg	mprotect (map->memory, map->size, PROT_NONE);
6174f5e7dd7Smrg	err = mprotect (map->memory, map->size, PROT_READ|PROT_WRITE);
6184f5e7dd7Smrg
6194f5e7dd7Smrg	if (err != 0) {
6204f5e7dd7Smrg	    fprintf(stderr, "mprotect(PROT_READ | PROT_WRITE) failed: %s\n",
6214f5e7dd7Smrg		    strerror(errno));
6224f5e7dd7Smrg	    fprintf(stderr, "remapping without mprotect performance kludge.\n");
6234f5e7dd7Smrg
6244f5e7dd7Smrg	    munmap(map->memory, map->size);
6254f5e7dd7Smrg	    map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
6264f5e7dd7Smrg	    if (map->memory == MAP_FAILED) {
6274f5e7dd7Smrg		map->memory = NULL;
6284f5e7dd7Smrg		close(fd);
6294f5e7dd7Smrg		return errno;
6304f5e7dd7Smrg	    }
6314f5e7dd7Smrg	}
6324f5e7dd7Smrg    }
6334f5e7dd7Smrg#endif
6344f5e7dd7Smrg
6354f5e7dd7Smrg    close(fd);
6364f5e7dd7Smrg
6374f5e7dd7Smrg    return 0;
6384f5e7dd7Smrg}
6394f5e7dd7Smrg
6404f5e7dd7Smrg/**
6414f5e7dd7Smrg * Unmap a memory region for a device using the Linux sysfs interface.
6424f5e7dd7Smrg *
6434f5e7dd7Smrg * \param dev   Device whose memory region is to be unmapped.
6444f5e7dd7Smrg * \param map   Parameters of the mapping that is to be destroyed.
6454f5e7dd7Smrg *
6464f5e7dd7Smrg * \return
6474f5e7dd7Smrg * Zero on success or an \c errno value on failure.
6484f5e7dd7Smrg *
6494f5e7dd7Smrg * \sa pci_device_map_rrange, pci_device_linux_sysfs_map_range
6504f5e7dd7Smrg *
6514f5e7dd7Smrg * \todo
6524f5e7dd7Smrg * Some older 2.6.x kernels don't implement the resourceN files.  On those
6534f5e7dd7Smrg * systems /dev/mem must be used.  On these systems it is also possible that
6544f5e7dd7Smrg * \c mmap64 may need to be used.
6554f5e7dd7Smrg */
6564f5e7dd7Smrgstatic int
6574f5e7dd7Smrgpci_device_linux_sysfs_unmap_range(struct pci_device *dev,
6584f5e7dd7Smrg				   struct pci_device_mapping *map)
6594f5e7dd7Smrg{
6604f5e7dd7Smrg    int err = 0;
6614f5e7dd7Smrg#ifdef HAVE_MTRR
6624f5e7dd7Smrg    struct mtrr_sentry sentry = {
6634f5e7dd7Smrg	.base = map->base,
6644f5e7dd7Smrg        .size = map->size,
6654f5e7dd7Smrg	.type = MTRR_TYPE_UNCACHABLE
6664f5e7dd7Smrg    };
6674f5e7dd7Smrg#endif
6684f5e7dd7Smrg
6694f5e7dd7Smrg    err = pci_device_generic_unmap_range (dev, map);
6704f5e7dd7Smrg    if (err)
6714f5e7dd7Smrg	return err;
6724f5e7dd7Smrg
6734f5e7dd7Smrg#ifdef HAVE_MTRR
6744f5e7dd7Smrg    if ((map->flags & PCI_DEV_MAP_FLAG_CACHABLE) != 0) {
6754f5e7dd7Smrg        sentry.type = MTRR_TYPE_WRBACK;
6764f5e7dd7Smrg    } else if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) != 0) {
6774f5e7dd7Smrg        sentry.type = MTRR_TYPE_WRCOMB;
6784f5e7dd7Smrg    }
6794f5e7dd7Smrg
6804f5e7dd7Smrg    if (pci_sys->mtrr_fd != -1 && sentry.type != MTRR_TYPE_UNCACHABLE) {
6814f5e7dd7Smrg	if (ioctl(pci_sys->mtrr_fd, MTRRIOC_DEL_ENTRY, &sentry) < 0) {
6824f5e7dd7Smrg	    /* FIXME: Should we report an error in this case?
6834f5e7dd7Smrg	     */
6844f5e7dd7Smrg	    fprintf(stderr, "error setting MTRR "
6854f5e7dd7Smrg		    "(base = 0x%08lx, size = 0x%08x, type = %u) %s (%d)\n",
6864f5e7dd7Smrg		    sentry.base, sentry.size, sentry.type,
6874f5e7dd7Smrg		    strerror(errno), errno);
6884f5e7dd7Smrg/*            err = errno;*/
6894f5e7dd7Smrg	}
6904f5e7dd7Smrg    }
6914f5e7dd7Smrg#endif
6924f5e7dd7Smrg
6934f5e7dd7Smrg    return err;
6944f5e7dd7Smrg}
6954f5e7dd7Smrg
6964f5e7dd7Smrgstatic void pci_device_linux_sysfs_enable(struct pci_device *dev)
6974f5e7dd7Smrg{
6984f5e7dd7Smrg    char name[256];
6994f5e7dd7Smrg    int fd;
7004f5e7dd7Smrg
7014f5e7dd7Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/enable",
7024f5e7dd7Smrg	      SYS_BUS_PCI,
7034f5e7dd7Smrg	      dev->domain,
7044f5e7dd7Smrg	      dev->bus,
7054f5e7dd7Smrg	      dev->dev,
7064f5e7dd7Smrg	      dev->func );
7074f5e7dd7Smrg
7084f5e7dd7Smrg    fd = open( name, O_RDWR );
7094f5e7dd7Smrg    if (fd == -1)
7104f5e7dd7Smrg       return;
7114f5e7dd7Smrg
7124f5e7dd7Smrg    write( fd, "1", 1 );
7134f5e7dd7Smrg    close(fd);
7144f5e7dd7Smrg}
71549f872b5Smrg
71649f872b5Smrgstatic int pci_device_linux_sysfs_boot_vga(struct pci_device *dev)
71749f872b5Smrg{
71849f872b5Smrg    char name[256];
71949f872b5Smrg    char reply[3];
72049f872b5Smrg    int fd, bytes_read;
72149f872b5Smrg    int ret = 0;
72249f872b5Smrg
72349f872b5Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/boot_vga",
72449f872b5Smrg	      SYS_BUS_PCI,
72549f872b5Smrg	      dev->domain,
72649f872b5Smrg	      dev->bus,
72749f872b5Smrg	      dev->dev,
72849f872b5Smrg	      dev->func );
72949f872b5Smrg
73049f872b5Smrg    fd = open( name, O_RDONLY );
73149f872b5Smrg    if (fd == -1)
73249f872b5Smrg       return 0;
73349f872b5Smrg
73449f872b5Smrg    bytes_read = read(fd, reply, 1);
73549f872b5Smrg    if (bytes_read != 1)
73649f872b5Smrg	goto out;
73749f872b5Smrg    if (reply[0] == '1')
73849f872b5Smrg	ret = 1;
73949f872b5Smrgout:
74049f872b5Smrg    close(fd);
74149f872b5Smrg    return ret;
74249f872b5Smrg}
74349f872b5Smrg
74449f872b5Smrgstatic int pci_device_linux_sysfs_has_kernel_driver(struct pci_device *dev)
74549f872b5Smrg{
74649f872b5Smrg    char name[256];
74749f872b5Smrg    struct stat dummy;
74849f872b5Smrg    int ret;
74949f872b5Smrg
75049f872b5Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/driver",
75149f872b5Smrg	      SYS_BUS_PCI,
75249f872b5Smrg	      dev->domain,
75349f872b5Smrg	      dev->bus,
75449f872b5Smrg	      dev->dev,
75549f872b5Smrg	      dev->func );
75649f872b5Smrg
75749f872b5Smrg    ret = stat(name, &dummy);
75849f872b5Smrg    if (ret < 0)
75949f872b5Smrg	return 0;
76049f872b5Smrg    return 1;
76149f872b5Smrg}
762