14f5e7dd7Smrg/*
24f5e7dd7Smrg * (C) Copyright IBM Corporation 2006
34f5e7dd7Smrg * All Rights Reserved.
4cad31331Smrg * Copyright 2012 Red Hat, Inc.
54f5e7dd7Smrg *
64f5e7dd7Smrg * Permission is hereby granted, free of charge, to any person obtaining a
74f5e7dd7Smrg * copy of this software and associated documentation files (the "Software"),
84f5e7dd7Smrg * to deal in the Software without restriction, including without limitation
94f5e7dd7Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub
104f5e7dd7Smrg * license, and/or sell copies of the Software, and to permit persons to whom
114f5e7dd7Smrg * the Software is furnished to do so, subject to the following conditions:
124f5e7dd7Smrg *
134f5e7dd7Smrg * The above copyright notice and this permission notice (including the next
144f5e7dd7Smrg * paragraph) shall be included in all copies or substantial portions of the
154f5e7dd7Smrg * Software.
164f5e7dd7Smrg *
174f5e7dd7Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
184f5e7dd7Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
194f5e7dd7Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
204f5e7dd7Smrg * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
214f5e7dd7Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
224f5e7dd7Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
234f5e7dd7Smrg * DEALINGS IN THE SOFTWARE.
244f5e7dd7Smrg */
254f5e7dd7Smrg
264f5e7dd7Smrg/**
274f5e7dd7Smrg * \file linux_sysfs.c
284f5e7dd7Smrg * Access PCI subsystem using Linux's sysfs interface.  This interface is
294f5e7dd7Smrg * available starting somewhere in the late 2.5.x kernel phase, and is the
304f5e7dd7Smrg * preferred method on all 2.6.x kernels.
314f5e7dd7Smrg *
324f5e7dd7Smrg * \author Ian Romanick <idr@us.ibm.com>
334f5e7dd7Smrg */
344f5e7dd7Smrg
354f5e7dd7Smrg#define _GNU_SOURCE
364f5e7dd7Smrg
376a94483fSmrg#ifdef HAVE_CONFIG_H
386a94483fSmrg#include "config.h"
396a94483fSmrg#endif
406a94483fSmrg
414f5e7dd7Smrg#include <stdlib.h>
424f5e7dd7Smrg#include <string.h>
434f5e7dd7Smrg#include <stdio.h>
444f5e7dd7Smrg#include <unistd.h>
454f5e7dd7Smrg#include <sys/types.h>
464f5e7dd7Smrg#include <sys/stat.h>
474f5e7dd7Smrg#include <fcntl.h>
4849310723Smrg#include <limits.h>
494f5e7dd7Smrg#include <sys/mman.h>
504f5e7dd7Smrg#include <dirent.h>
514f5e7dd7Smrg#include <errno.h>
524f5e7dd7Smrg
53dc7d6647Smrg#if defined(__i386__) || defined(__x86_64__)
54cad31331Smrg#include <sys/io.h>
55cad31331Smrg#else
56cad31331Smrg#define inb(x) -1
57cad31331Smrg#define inw(x) -1
58cad31331Smrg#define inl(x) -1
59cad31331Smrg#define outb(x,y) do {} while (0)
60cad31331Smrg#define outw(x,y) do {} while (0)
61cad31331Smrg#define outl(x,y) do {} while (0)
62cad31331Smrg#define iopl(x) -1
63cad31331Smrg#endif
64cad31331Smrg
654f5e7dd7Smrg#ifdef HAVE_MTRR
664f5e7dd7Smrg#include <asm/mtrr.h>
674f5e7dd7Smrg#include <sys/ioctl.h>
684f5e7dd7Smrg#endif
694f5e7dd7Smrg
704f5e7dd7Smrg#include "pciaccess.h"
714f5e7dd7Smrg#include "pciaccess_private.h"
724f5e7dd7Smrg#include "linux_devmem.h"
734f5e7dd7Smrg
74e432255dSmrgstatic const struct pci_system_methods linux_sysfs_methods;
754f5e7dd7Smrg
764f5e7dd7Smrg#define SYS_BUS_PCI "/sys/bus/pci/devices"
774f5e7dd7Smrg
78e432255dSmrgstatic int
79e432255dSmrgpci_device_linux_sysfs_read( struct pci_device * dev, void * data,
80e432255dSmrg			     pciaddr_t offset, pciaddr_t size,
81e432255dSmrg			     pciaddr_t * bytes_read );
824f5e7dd7Smrg
834f5e7dd7Smrgstatic int populate_entries(struct pci_system * pci_sys);
844f5e7dd7Smrg
854f5e7dd7Smrg/**
864f5e7dd7Smrg * Attempt to access PCI subsystem using Linux's sysfs interface.
874f5e7dd7Smrg */
884f5e7dd7Smrg_pci_hidden int
894f5e7dd7Smrgpci_system_linux_sysfs_create( void )
904f5e7dd7Smrg{
914f5e7dd7Smrg    int err = 0;
924f5e7dd7Smrg    struct stat st;
934f5e7dd7Smrg
944f5e7dd7Smrg
954f5e7dd7Smrg    /* If the directory "/sys/bus/pci/devices" exists, then the PCI subsystem
964f5e7dd7Smrg     * can be accessed using this interface.
974f5e7dd7Smrg     */
98cad31331Smrg
994f5e7dd7Smrg    if ( stat( SYS_BUS_PCI, & st ) == 0 ) {
1004f5e7dd7Smrg	pci_sys = calloc( 1, sizeof( struct pci_system ) );
1014f5e7dd7Smrg	if ( pci_sys != NULL ) {
1024f5e7dd7Smrg	    pci_sys->methods = & linux_sysfs_methods;
1034f5e7dd7Smrg#ifdef HAVE_MTRR
104cad31331Smrg	    pci_sys->mtrr_fd = open("/proc/mtrr", O_WRONLY | O_CLOEXEC);
1054f5e7dd7Smrg#endif
1064f5e7dd7Smrg	    err = populate_entries(pci_sys);
1074f5e7dd7Smrg	}
1084f5e7dd7Smrg	else {
1094f5e7dd7Smrg	    err = ENOMEM;
1104f5e7dd7Smrg	}
1114f5e7dd7Smrg    }
1124f5e7dd7Smrg    else {
1134f5e7dd7Smrg	err = errno;
1144f5e7dd7Smrg    }
1154f5e7dd7Smrg
1164f5e7dd7Smrg    return err;
1174f5e7dd7Smrg}
1184f5e7dd7Smrg
1194f5e7dd7Smrg
1204f5e7dd7Smrg/**
12166337f63Smrg * Filter out the names "." and ".." from the scanned sysfs entries.
1224f5e7dd7Smrg *
1234f5e7dd7Smrg * \param d  Directory entry being processed by \c scandir.
1244f5e7dd7Smrg *
1254f5e7dd7Smrg * \return
12666337f63Smrg * Zero if the entry name matches either "." or ".."
1274f5e7dd7Smrg *
1284f5e7dd7Smrg * \sa scandir, populate_entries
1294f5e7dd7Smrg */
1304f5e7dd7Smrgstatic int
1314f5e7dd7Smrgscan_sys_pci_filter( const struct dirent * d )
1324f5e7dd7Smrg{
133cad31331Smrg    return !((strcmp( d->d_name, "." ) == 0)
1344f5e7dd7Smrg	     || (strcmp( d->d_name, ".." ) == 0));
1354f5e7dd7Smrg}
1364f5e7dd7Smrg
1374f5e7dd7Smrg
13849310723Smrgstatic int
13949310723Smrgparse_separate_sysfs_files(struct pci_device * dev)
14049310723Smrg{
14149310723Smrg    static const char *attrs[] = {
14249310723Smrg      "vendor",
14349310723Smrg      "device",
14449310723Smrg      "class",
14549310723Smrg      "revision",
14649310723Smrg      "subsystem_vendor",
14749310723Smrg      "subsystem_device",
14849310723Smrg    };
14949310723Smrg    char name[256];
15049310723Smrg    char resource[512];
15149310723Smrg    uint64_t data[6];
15249310723Smrg    int fd;
15349310723Smrg    int i;
15449310723Smrg
15549310723Smrg    for (i = 0; i < 6; i++) {
15649310723Smrg	snprintf(name, 255, "%s/%04x:%02x:%02x.%1u/%s",
15749310723Smrg		 SYS_BUS_PCI,
15849310723Smrg		 dev->domain,
15949310723Smrg		 dev->bus,
16049310723Smrg		 dev->dev,
16149310723Smrg		 dev->func,
16249310723Smrg		 attrs[i]);
16349310723Smrg
16449310723Smrg	fd = open(name, O_RDONLY | O_CLOEXEC);
16549310723Smrg	if (fd == -1) {
16649310723Smrg	    return errno;
16749310723Smrg	}
16849310723Smrg
16949310723Smrg	read(fd, resource, 512);
17049310723Smrg	resource[511] = '\0';
17149310723Smrg
17249310723Smrg	close(fd);
17349310723Smrg
17449310723Smrg	data[i] = strtoull(resource, NULL, 16);
17549310723Smrg    }
17649310723Smrg
17749310723Smrg    dev->vendor_id = data[0] & 0xffff;
17849310723Smrg    dev->device_id = data[1] & 0xffff;
17949310723Smrg    dev->device_class = data[2] & 0xffffff;
18049310723Smrg    dev->revision = data[3] & 0xff;
18149310723Smrg    dev->subvendor_id = data[4] & 0xffff;
18249310723Smrg    dev->subdevice_id = data[5] & 0xffff;
18349310723Smrg
18449310723Smrg    return 0;
18549310723Smrg}
18649310723Smrg
18749310723Smrg
1884f5e7dd7Smrgint
1894f5e7dd7Smrgpopulate_entries( struct pci_system * p )
1904f5e7dd7Smrg{
191cad31331Smrg    struct dirent ** devices = NULL;
1924f5e7dd7Smrg    int n;
1934f5e7dd7Smrg    int i;
1944f5e7dd7Smrg    int err = 0;
1954f5e7dd7Smrg
1964f5e7dd7Smrg
1974f5e7dd7Smrg    n = scandir( SYS_BUS_PCI, & devices, scan_sys_pci_filter, alphasort );
1984f5e7dd7Smrg    if ( n > 0 ) {
1994f5e7dd7Smrg	p->num_devices = n;
2004f5e7dd7Smrg	p->devices = calloc( n, sizeof( struct pci_device_private ) );
2014f5e7dd7Smrg
2024f5e7dd7Smrg	if (p->devices != NULL) {
2034f5e7dd7Smrg	    for (i = 0 ; i < n ; i++) {
2044f5e7dd7Smrg		uint8_t config[48];
2054f5e7dd7Smrg		pciaddr_t bytes;
2064f5e7dd7Smrg		unsigned dom, bus, dev, func;
2074f5e7dd7Smrg		struct pci_device_private *device =
2084f5e7dd7Smrg			(struct pci_device_private *) &p->devices[i];
2094f5e7dd7Smrg
2104f5e7dd7Smrg
21166337f63Smrg		sscanf(devices[i]->d_name, "%x:%02x:%02x.%1u",
2124f5e7dd7Smrg		       & dom, & bus, & dev, & func);
2134f5e7dd7Smrg
2144f5e7dd7Smrg		device->base.domain = dom;
21566337f63Smrg		/*
21666337f63Smrg		 * Applications compiled with older versions  do not expect
21766337f63Smrg		 * 32-bit domain numbers. To keep them working, we keep a 16-bit
21866337f63Smrg		 * version of the domain number at the previous location.
21966337f63Smrg		 */
22066337f63Smrg		if (dom > 0xffff)
22166337f63Smrg		     device->base.domain_16 = 0xffff;
22266337f63Smrg		else
22366337f63Smrg		     device->base.domain_16 = dom;
2244f5e7dd7Smrg		device->base.bus = bus;
2254f5e7dd7Smrg		device->base.dev = dev;
2264f5e7dd7Smrg		device->base.func = func;
2274f5e7dd7Smrg
2284f5e7dd7Smrg
22949310723Smrg		err = parse_separate_sysfs_files(& device->base);
23049310723Smrg		if (!err)
23149310723Smrg		    continue;
23249310723Smrg
2334f5e7dd7Smrg		err = pci_device_linux_sysfs_read(& device->base, config, 0,
2344f5e7dd7Smrg						  48, & bytes);
2354f5e7dd7Smrg		if ((bytes == 48) && !err) {
2364f5e7dd7Smrg		    device->base.vendor_id = (uint16_t)config[0]
2374f5e7dd7Smrg			+ ((uint16_t)config[1] << 8);
2384f5e7dd7Smrg		    device->base.device_id = (uint16_t)config[2]
2394f5e7dd7Smrg			+ ((uint16_t)config[3] << 8);
2404f5e7dd7Smrg		    device->base.device_class = (uint32_t)config[9]
2414f5e7dd7Smrg			+ ((uint32_t)config[10] << 8)
2424f5e7dd7Smrg			+ ((uint32_t)config[11] << 16);
2434f5e7dd7Smrg		    device->base.revision = config[8];
2444f5e7dd7Smrg		    device->base.subvendor_id = (uint16_t)config[44]
2454f5e7dd7Smrg			+ ((uint16_t)config[45] << 8);
2464f5e7dd7Smrg		    device->base.subdevice_id = (uint16_t)config[46]
2474f5e7dd7Smrg			+ ((uint16_t)config[47] << 8);
2484f5e7dd7Smrg		}
2494f5e7dd7Smrg
2504f5e7dd7Smrg		if (err) {
2514f5e7dd7Smrg		    break;
2524f5e7dd7Smrg		}
2534f5e7dd7Smrg	    }
2544f5e7dd7Smrg	}
2554f5e7dd7Smrg	else {
2564f5e7dd7Smrg	    err = ENOMEM;
2574f5e7dd7Smrg	}
2584f5e7dd7Smrg    }
2594f5e7dd7Smrg
26049f872b5Smrg    for (i = 0; i < n; i++)
26149f872b5Smrg	free(devices[i]);
26249f872b5Smrg    free(devices);
26349f872b5Smrg
2644f5e7dd7Smrg    if (err) {
2654f5e7dd7Smrg	free(p->devices);
2664f5e7dd7Smrg	p->devices = NULL;
2672029f493Smrg	p->num_devices = 0;
2684f5e7dd7Smrg    }
2694f5e7dd7Smrg
2704f5e7dd7Smrg    return err;
2714f5e7dd7Smrg}
2724f5e7dd7Smrg
2734f5e7dd7Smrg
2744f5e7dd7Smrgstatic int
2754f5e7dd7Smrgpci_device_linux_sysfs_probe( struct pci_device * dev )
2764f5e7dd7Smrg{
2774f5e7dd7Smrg    char     name[256];
2784f5e7dd7Smrg    uint8_t  config[256];
2794f5e7dd7Smrg    char     resource[512];
2804f5e7dd7Smrg    int fd;
2814f5e7dd7Smrg    pciaddr_t bytes;
2824f5e7dd7Smrg    unsigned i;
2834f5e7dd7Smrg    int err;
2844f5e7dd7Smrg
2854f5e7dd7Smrg
2864f5e7dd7Smrg    err = pci_device_linux_sysfs_read( dev, config, 0, 256, & bytes );
2874f5e7dd7Smrg    if ( bytes >= 64 ) {
2884f5e7dd7Smrg	struct pci_device_private *priv = (struct pci_device_private *) dev;
2894f5e7dd7Smrg
2904f5e7dd7Smrg	dev->irq = config[60];
2914f5e7dd7Smrg	priv->header_type = config[14];
2924f5e7dd7Smrg
2934f5e7dd7Smrg
2944f5e7dd7Smrg	/* The PCI config registers can be used to obtain information
2954f5e7dd7Smrg	 * about the memory and I/O regions for the device.  However,
2964f5e7dd7Smrg	 * doing so requires some tricky parsing (to correctly handle
2974f5e7dd7Smrg	 * 64-bit memory regions) and requires writing to the config
2984f5e7dd7Smrg	 * registers.  Since we'd like to avoid having to deal with the
2994f5e7dd7Smrg	 * parsing issues and non-root users can write to PCI config
3004f5e7dd7Smrg	 * registers, we use a different file in the device's sysfs
3014f5e7dd7Smrg	 * directory called "resource".
302cad31331Smrg	 *
3034f5e7dd7Smrg	 * The resource file contains all of the needed information in
3044f5e7dd7Smrg	 * a format that is consistent across all platforms.  Each BAR
3054f5e7dd7Smrg	 * and the expansion ROM have a single line of data containing
3064f5e7dd7Smrg	 * 3, 64-bit hex values:  the first address in the region,
3074f5e7dd7Smrg	 * the last address in the region, and the region's flags.
3084f5e7dd7Smrg	 */
3094f5e7dd7Smrg	snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/resource",
3104f5e7dd7Smrg		  SYS_BUS_PCI,
3114f5e7dd7Smrg		  dev->domain,
3124f5e7dd7Smrg		  dev->bus,
3134f5e7dd7Smrg		  dev->dev,
3144f5e7dd7Smrg		  dev->func );
315cad31331Smrg	fd = open( name, O_RDONLY | O_CLOEXEC);
3164f5e7dd7Smrg	if ( fd != -1 ) {
3174f5e7dd7Smrg	    char * next;
3184f5e7dd7Smrg	    pciaddr_t  low_addr;
3194f5e7dd7Smrg	    pciaddr_t  high_addr;
3204f5e7dd7Smrg	    pciaddr_t  flags;
3214f5e7dd7Smrg
3224f5e7dd7Smrg
3234f5e7dd7Smrg	    bytes = read( fd, resource, 512 );
3244f5e7dd7Smrg	    resource[511] = '\0';
3254f5e7dd7Smrg
3264f5e7dd7Smrg	    close( fd );
3274f5e7dd7Smrg
3284f5e7dd7Smrg	    next = resource;
3294f5e7dd7Smrg	    for ( i = 0 ; i < 6 ; i++ ) {
3304f5e7dd7Smrg
3314f5e7dd7Smrg		dev->regions[i].base_addr = strtoull( next, & next, 16 );
3324f5e7dd7Smrg		high_addr = strtoull( next, & next, 16 );
3334f5e7dd7Smrg		flags = strtoull( next, & next, 16 );
334cad31331Smrg
3354f5e7dd7Smrg		if ( dev->regions[i].base_addr != 0 ) {
336cad31331Smrg		    dev->regions[i].size = (high_addr
3374f5e7dd7Smrg					    - dev->regions[i].base_addr) + 1;
3384f5e7dd7Smrg
33948becaf0Smrg		    dev->regions[i].is_IO = (flags & 0x01) != 0;
34048becaf0Smrg		    dev->regions[i].is_64 = (flags & 0x04) != 0;
34148becaf0Smrg		    dev->regions[i].is_prefetchable = (flags & 0x08) != 0;
3424f5e7dd7Smrg		}
3434f5e7dd7Smrg	    }
3444f5e7dd7Smrg
3454f5e7dd7Smrg	    low_addr = strtoull( next, & next, 16 );
3464f5e7dd7Smrg	    high_addr = strtoull( next, & next, 16 );
3474f5e7dd7Smrg	    flags = strtoull( next, & next, 16 );
3484f5e7dd7Smrg	    if ( low_addr != 0 ) {
3494f5e7dd7Smrg		priv->rom_base = low_addr;
3504f5e7dd7Smrg		dev->rom_size = (high_addr - low_addr) + 1;
3514f5e7dd7Smrg	    }
3524f5e7dd7Smrg	}
3534f5e7dd7Smrg    }
3544f5e7dd7Smrg
3554f5e7dd7Smrg    return err;
3564f5e7dd7Smrg}
3574f5e7dd7Smrg
3584f5e7dd7Smrg
3594f5e7dd7Smrgstatic int
3604f5e7dd7Smrgpci_device_linux_sysfs_read_rom( struct pci_device * dev, void * buffer )
3614f5e7dd7Smrg{
3624f5e7dd7Smrg    char name[256];
3634f5e7dd7Smrg    int fd;
3644f5e7dd7Smrg    struct stat  st;
3654f5e7dd7Smrg    int err = 0;
3664f5e7dd7Smrg    size_t rom_size;
3674f5e7dd7Smrg    size_t total_bytes;
3684f5e7dd7Smrg
3694f5e7dd7Smrg
3704f5e7dd7Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/rom",
3714f5e7dd7Smrg	      SYS_BUS_PCI,
3724f5e7dd7Smrg	      dev->domain,
3734f5e7dd7Smrg	      dev->bus,
3744f5e7dd7Smrg	      dev->dev,
3754f5e7dd7Smrg	      dev->func );
376cad31331Smrg
377cad31331Smrg    fd = open( name, O_RDWR | O_CLOEXEC);
3784f5e7dd7Smrg    if ( fd == -1 ) {
37949f872b5Smrg#ifdef LINUX_ROM
3804f5e7dd7Smrg	/* If reading the ROM using sysfs fails, fall back to the old
3814f5e7dd7Smrg	 * /dev/mem based interface.
38249f872b5Smrg	 * disable this for newer kernels using configure
3834f5e7dd7Smrg	 */
3844f5e7dd7Smrg	return pci_device_linux_devmem_read_rom(dev, buffer);
38549f872b5Smrg#else
38649f872b5Smrg	return errno;
38749f872b5Smrg#endif
3884f5e7dd7Smrg    }
3894f5e7dd7Smrg
3904f5e7dd7Smrg
3914f5e7dd7Smrg    if ( fstat( fd, & st ) == -1 ) {
3924f5e7dd7Smrg	close( fd );
3934f5e7dd7Smrg	return errno;
3944f5e7dd7Smrg    }
3954f5e7dd7Smrg
3964f5e7dd7Smrg    rom_size = st.st_size;
3974f5e7dd7Smrg    if ( rom_size == 0 )
3984f5e7dd7Smrg	rom_size = 0x10000;
3994f5e7dd7Smrg
4004f5e7dd7Smrg    /* This is a quirky thing on Linux.  Even though the ROM and the file
4014f5e7dd7Smrg     * for the ROM in sysfs are read-only, the string "1" must be written to
4024f5e7dd7Smrg     * the file to enable the ROM.  After the data has been read, "0" must be
4034f5e7dd7Smrg     * written to the file to disable the ROM.
4044f5e7dd7Smrg     */
4054f5e7dd7Smrg    write( fd, "1", 1 );
4064f5e7dd7Smrg    lseek( fd, 0, SEEK_SET );
4074f5e7dd7Smrg
4084f5e7dd7Smrg    for ( total_bytes = 0 ; total_bytes < rom_size ; /* empty */ ) {
4094f5e7dd7Smrg	const int bytes = read( fd, (char *) buffer + total_bytes,
4104f5e7dd7Smrg				rom_size - total_bytes );
4114f5e7dd7Smrg	if ( bytes == -1 ) {
4124f5e7dd7Smrg	    err = errno;
4134f5e7dd7Smrg	    break;
4144f5e7dd7Smrg	}
4154f5e7dd7Smrg	else if ( bytes == 0 ) {
4164f5e7dd7Smrg	    break;
4174f5e7dd7Smrg	}
4184f5e7dd7Smrg
4194f5e7dd7Smrg	total_bytes += bytes;
4204f5e7dd7Smrg    }
421cad31331Smrg
4224f5e7dd7Smrg
4234f5e7dd7Smrg    lseek( fd, 0, SEEK_SET );
4244f5e7dd7Smrg    write( fd, "0", 1 );
4254f5e7dd7Smrg
4264f5e7dd7Smrg    close( fd );
4274f5e7dd7Smrg    return err;
4284f5e7dd7Smrg}
4294f5e7dd7Smrg
4304f5e7dd7Smrg
4314f5e7dd7Smrgstatic int
4324f5e7dd7Smrgpci_device_linux_sysfs_read( struct pci_device * dev, void * data,
4334f5e7dd7Smrg			     pciaddr_t offset, pciaddr_t size,
4344f5e7dd7Smrg			     pciaddr_t * bytes_read )
4354f5e7dd7Smrg{
4364f5e7dd7Smrg    char name[256];
4374f5e7dd7Smrg    pciaddr_t temp_size = size;
4384f5e7dd7Smrg    int err = 0;
4394f5e7dd7Smrg    int fd;
4404f5e7dd7Smrg    char *data_bytes = data;
4414f5e7dd7Smrg
4424f5e7dd7Smrg    if ( bytes_read != NULL ) {
4434f5e7dd7Smrg	*bytes_read = 0;
4444f5e7dd7Smrg    }
4454f5e7dd7Smrg
4464f5e7dd7Smrg    /* Each device has a directory under sysfs.  Within that directory there
4474f5e7dd7Smrg     * is a file named "config".  This file used to access the PCI config
4484f5e7dd7Smrg     * space.  It is used here to obtain most of the information about the
4494f5e7dd7Smrg     * device.
4504f5e7dd7Smrg     */
4514f5e7dd7Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/config",
4524f5e7dd7Smrg	      SYS_BUS_PCI,
4534f5e7dd7Smrg	      dev->domain,
4544f5e7dd7Smrg	      dev->bus,
4554f5e7dd7Smrg	      dev->dev,
4564f5e7dd7Smrg	      dev->func );
4574f5e7dd7Smrg
458cad31331Smrg    fd = open( name, O_RDONLY | O_CLOEXEC);
4594f5e7dd7Smrg    if ( fd == -1 ) {
4604f5e7dd7Smrg	return errno;
4614f5e7dd7Smrg    }
4624f5e7dd7Smrg
4634f5e7dd7Smrg
4644f5e7dd7Smrg    while ( temp_size > 0 ) {
4655ad99bdfSmrg	const ssize_t bytes = pread( fd, data_bytes, temp_size, offset );
4664f5e7dd7Smrg
4674f5e7dd7Smrg	/* If zero bytes were read, then we assume it's the end of the
4684f5e7dd7Smrg	 * config file.
4694f5e7dd7Smrg	 */
470cad31331Smrg	if (bytes == 0)
471cad31331Smrg	    break;
472cad31331Smrg	if ( bytes < 0 ) {
4734f5e7dd7Smrg	    err = errno;
4744f5e7dd7Smrg	    break;
4754f5e7dd7Smrg	}
4764f5e7dd7Smrg
4774f5e7dd7Smrg	temp_size -= bytes;
4784f5e7dd7Smrg	offset += bytes;
4794f5e7dd7Smrg	data_bytes += bytes;
4804f5e7dd7Smrg    }
481cad31331Smrg
4824f5e7dd7Smrg    if ( bytes_read != NULL ) {
4834f5e7dd7Smrg	*bytes_read = size - temp_size;
4844f5e7dd7Smrg    }
4854f5e7dd7Smrg
4864f5e7dd7Smrg    close( fd );
4874f5e7dd7Smrg    return err;
4884f5e7dd7Smrg}
4894f5e7dd7Smrg
4904f5e7dd7Smrg
4914f5e7dd7Smrgstatic int
4924f5e7dd7Smrgpci_device_linux_sysfs_write( struct pci_device * dev, const void * data,
4934f5e7dd7Smrg			     pciaddr_t offset, pciaddr_t size,
4944f5e7dd7Smrg			     pciaddr_t * bytes_written )
4954f5e7dd7Smrg{
4964f5e7dd7Smrg    char name[256];
4974f5e7dd7Smrg    pciaddr_t temp_size = size;
4984f5e7dd7Smrg    int err = 0;
4994f5e7dd7Smrg    int fd;
5004f5e7dd7Smrg    const char *data_bytes = data;
5014f5e7dd7Smrg
5024f5e7dd7Smrg    if ( bytes_written != NULL ) {
5034f5e7dd7Smrg	*bytes_written = 0;
5044f5e7dd7Smrg    }
5054f5e7dd7Smrg
5064f5e7dd7Smrg    /* Each device has a directory under sysfs.  Within that directory there
5074f5e7dd7Smrg     * is a file named "config".  This file used to access the PCI config
5084f5e7dd7Smrg     * space.  It is used here to obtain most of the information about the
5094f5e7dd7Smrg     * device.
5104f5e7dd7Smrg     */
5114f5e7dd7Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/config",
5124f5e7dd7Smrg	      SYS_BUS_PCI,
5134f5e7dd7Smrg	      dev->domain,
5144f5e7dd7Smrg	      dev->bus,
5154f5e7dd7Smrg	      dev->dev,
5164f5e7dd7Smrg	      dev->func );
5174f5e7dd7Smrg
518cad31331Smrg    fd = open( name, O_WRONLY | O_CLOEXEC);
5194f5e7dd7Smrg    if ( fd == -1 ) {
5204f5e7dd7Smrg	return errno;
5214f5e7dd7Smrg    }
5224f5e7dd7Smrg
5234f5e7dd7Smrg
5244f5e7dd7Smrg    while ( temp_size > 0 ) {
5255ad99bdfSmrg	const ssize_t bytes = pwrite( fd, data_bytes, temp_size, offset );
5264f5e7dd7Smrg
5274f5e7dd7Smrg	/* If zero bytes were written, then we assume it's the end of the
5284f5e7dd7Smrg	 * config file.
5294f5e7dd7Smrg	 */
530cad31331Smrg	if ( bytes == 0 )
531cad31331Smrg	    break;
532cad31331Smrg	if ( bytes < 0 ) {
5334f5e7dd7Smrg	    err = errno;
5344f5e7dd7Smrg	    break;
5354f5e7dd7Smrg	}
5364f5e7dd7Smrg
5374f5e7dd7Smrg	temp_size -= bytes;
5384f5e7dd7Smrg	offset += bytes;
5394f5e7dd7Smrg	data_bytes += bytes;
5404f5e7dd7Smrg    }
541cad31331Smrg
5424f5e7dd7Smrg    if ( bytes_written != NULL ) {
5434f5e7dd7Smrg	*bytes_written = size - temp_size;
5444f5e7dd7Smrg    }
5454f5e7dd7Smrg
5464f5e7dd7Smrg    close( fd );
5474f5e7dd7Smrg    return err;
5484f5e7dd7Smrg}
5494f5e7dd7Smrg
5504f5e7dd7Smrgstatic int
5514f5e7dd7Smrgpci_device_linux_sysfs_map_range_wc(struct pci_device *dev,
5524f5e7dd7Smrg				    struct pci_device_mapping *map)
5534f5e7dd7Smrg{
5544f5e7dd7Smrg    char name[256];
5554f5e7dd7Smrg    int fd;
556cad31331Smrg    const int prot = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
5574f5e7dd7Smrg        ? (PROT_READ | PROT_WRITE) : PROT_READ;
558cad31331Smrg    const int open_flags = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
5594f5e7dd7Smrg        ? O_RDWR : O_RDONLY;
5604f5e7dd7Smrg    const off_t offset = map->base - dev->regions[map->region].base_addr;
5614f5e7dd7Smrg
5624f5e7dd7Smrg    snprintf(name, 255, "%s/%04x:%02x:%02x.%1u/resource%u_wc",
5634f5e7dd7Smrg	     SYS_BUS_PCI,
5644f5e7dd7Smrg	     dev->domain,
5654f5e7dd7Smrg	     dev->bus,
5664f5e7dd7Smrg	     dev->dev,
5674f5e7dd7Smrg	     dev->func,
5684f5e7dd7Smrg	     map->region);
569cad31331Smrg    fd = open(name, open_flags | O_CLOEXEC);
5704f5e7dd7Smrg    if (fd == -1)
5714f5e7dd7Smrg	    return errno;
5724f5e7dd7Smrg
5734f5e7dd7Smrg    map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
5744f5e7dd7Smrg    if (map->memory == MAP_FAILED) {
5754f5e7dd7Smrg        map->memory = NULL;
5764f5e7dd7Smrg	close(fd);
5774f5e7dd7Smrg	return errno;
5784f5e7dd7Smrg    }
5794f5e7dd7Smrg
5804f5e7dd7Smrg    close(fd);
5814f5e7dd7Smrg
5824f5e7dd7Smrg    return 0;
5834f5e7dd7Smrg}
5844f5e7dd7Smrg
5854f5e7dd7Smrg/**
5864f5e7dd7Smrg * Map a memory region for a device using the Linux sysfs interface.
587cad31331Smrg *
5884f5e7dd7Smrg * \param dev   Device whose memory region is to be mapped.
5894f5e7dd7Smrg * \param map   Parameters of the mapping that is to be created.
590cad31331Smrg *
5914f5e7dd7Smrg * \return
5924f5e7dd7Smrg * Zero on success or an \c errno value on failure.
5934f5e7dd7Smrg *
5944f5e7dd7Smrg * \sa pci_device_map_rrange, pci_device_linux_sysfs_unmap_range
5954f5e7dd7Smrg *
5964f5e7dd7Smrg * \todo
5974f5e7dd7Smrg * Some older 2.6.x kernels don't implement the resourceN files.  On those
5984f5e7dd7Smrg * systems /dev/mem must be used.  On these systems it is also possible that
5994f5e7dd7Smrg * \c mmap64 may need to be used.
6004f5e7dd7Smrg */
6014f5e7dd7Smrgstatic int
6024f5e7dd7Smrgpci_device_linux_sysfs_map_range(struct pci_device *dev,
6034f5e7dd7Smrg                                 struct pci_device_mapping *map)
6044f5e7dd7Smrg{
6054f5e7dd7Smrg    char name[256];
6064f5e7dd7Smrg    int fd;
6074f5e7dd7Smrg    int err = 0;
608cad31331Smrg    const int prot = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
6094f5e7dd7Smrg        ? (PROT_READ | PROT_WRITE) : PROT_READ;
610cad31331Smrg    const int open_flags = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
6114f5e7dd7Smrg        ? O_RDWR : O_RDONLY;
6124f5e7dd7Smrg    const off_t offset = map->base - dev->regions[map->region].base_addr;
6134f5e7dd7Smrg#ifdef HAVE_MTRR
6144f5e7dd7Smrg    struct mtrr_sentry sentry = {
6154f5e7dd7Smrg	.base = map->base,
6164f5e7dd7Smrg        .size = map->size,
6174f5e7dd7Smrg	.type = MTRR_TYPE_UNCACHABLE
6184f5e7dd7Smrg    };
6194f5e7dd7Smrg#endif
6204f5e7dd7Smrg
6214f5e7dd7Smrg    /* For WC mappings, try sysfs resourceN_wc file first */
6224f5e7dd7Smrg    if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) &&
6234f5e7dd7Smrg	!pci_device_linux_sysfs_map_range_wc(dev, map))
6244f5e7dd7Smrg	    return 0;
6254f5e7dd7Smrg
6264f5e7dd7Smrg    snprintf(name, 255, "%s/%04x:%02x:%02x.%1u/resource%u",
6274f5e7dd7Smrg             SYS_BUS_PCI,
6284f5e7dd7Smrg             dev->domain,
6294f5e7dd7Smrg             dev->bus,
6304f5e7dd7Smrg             dev->dev,
6314f5e7dd7Smrg             dev->func,
6324f5e7dd7Smrg             map->region);
6334f5e7dd7Smrg
634cad31331Smrg    fd = open(name, open_flags | O_CLOEXEC);
6354f5e7dd7Smrg    if (fd == -1) {
6364f5e7dd7Smrg        return errno;
6374f5e7dd7Smrg    }
6384f5e7dd7Smrg
6394f5e7dd7Smrg
6404f5e7dd7Smrg    map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
6414f5e7dd7Smrg    if (map->memory == MAP_FAILED) {
6424f5e7dd7Smrg        map->memory = NULL;
6434f5e7dd7Smrg	close(fd);
6444f5e7dd7Smrg	return errno;
6454f5e7dd7Smrg    }
6464f5e7dd7Smrg
6474f5e7dd7Smrg#ifdef HAVE_MTRR
6484f5e7dd7Smrg    if ((map->flags & PCI_DEV_MAP_FLAG_CACHABLE) != 0) {
6494f5e7dd7Smrg        sentry.type = MTRR_TYPE_WRBACK;
6504f5e7dd7Smrg    } else if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) != 0) {
6514f5e7dd7Smrg        sentry.type = MTRR_TYPE_WRCOMB;
6524f5e7dd7Smrg    }
6534f5e7dd7Smrg
6544f5e7dd7Smrg    if (pci_sys->mtrr_fd != -1 && sentry.type != MTRR_TYPE_UNCACHABLE) {
6554f5e7dd7Smrg	if (ioctl(pci_sys->mtrr_fd, MTRRIOC_ADD_ENTRY, &sentry) < 0) {
6564f5e7dd7Smrg	    /* FIXME: Should we report an error in this case?
6574f5e7dd7Smrg	     */
6584f5e7dd7Smrg	    fprintf(stderr, "error setting MTRR "
65949310723Smrg		    "(base = 0x%016" PRIx64 ", size = 0x%08x, type = %u) %s (%d)\n",
66049310723Smrg		    (pciaddr_t)sentry.base, sentry.size, sentry.type,
6614f5e7dd7Smrg		    strerror(errno), errno);
6624f5e7dd7Smrg/*            err = errno;*/
6634f5e7dd7Smrg	}
6644f5e7dd7Smrg	/* KLUDGE ALERT -- rewrite the PTEs to turn off the CD and WT bits */
6654f5e7dd7Smrg	mprotect (map->memory, map->size, PROT_NONE);
6664f5e7dd7Smrg	err = mprotect (map->memory, map->size, PROT_READ|PROT_WRITE);
6674f5e7dd7Smrg
6684f5e7dd7Smrg	if (err != 0) {
6694f5e7dd7Smrg	    fprintf(stderr, "mprotect(PROT_READ | PROT_WRITE) failed: %s\n",
6704f5e7dd7Smrg		    strerror(errno));
6714f5e7dd7Smrg	    fprintf(stderr, "remapping without mprotect performance kludge.\n");
6724f5e7dd7Smrg
6734f5e7dd7Smrg	    munmap(map->memory, map->size);
6744f5e7dd7Smrg	    map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
6754f5e7dd7Smrg	    if (map->memory == MAP_FAILED) {
6764f5e7dd7Smrg		map->memory = NULL;
6774f5e7dd7Smrg		close(fd);
6784f5e7dd7Smrg		return errno;
6794f5e7dd7Smrg	    }
6804f5e7dd7Smrg	}
6814f5e7dd7Smrg    }
6824f5e7dd7Smrg#endif
6834f5e7dd7Smrg
6844f5e7dd7Smrg    close(fd);
6854f5e7dd7Smrg
6864f5e7dd7Smrg    return 0;
6874f5e7dd7Smrg}
6884f5e7dd7Smrg
6894f5e7dd7Smrg/**
6904f5e7dd7Smrg * Unmap a memory region for a device using the Linux sysfs interface.
691cad31331Smrg *
6924f5e7dd7Smrg * \param dev   Device whose memory region is to be unmapped.
6934f5e7dd7Smrg * \param map   Parameters of the mapping that is to be destroyed.
694cad31331Smrg *
6954f5e7dd7Smrg * \return
6964f5e7dd7Smrg * Zero on success or an \c errno value on failure.
6974f5e7dd7Smrg *
6984f5e7dd7Smrg * \sa pci_device_map_rrange, pci_device_linux_sysfs_map_range
6994f5e7dd7Smrg *
7004f5e7dd7Smrg * \todo
7014f5e7dd7Smrg * Some older 2.6.x kernels don't implement the resourceN files.  On those
7024f5e7dd7Smrg * systems /dev/mem must be used.  On these systems it is also possible that
7034f5e7dd7Smrg * \c mmap64 may need to be used.
7044f5e7dd7Smrg */
7054f5e7dd7Smrgstatic int
7064f5e7dd7Smrgpci_device_linux_sysfs_unmap_range(struct pci_device *dev,
7074f5e7dd7Smrg				   struct pci_device_mapping *map)
7084f5e7dd7Smrg{
7094f5e7dd7Smrg    int err = 0;
7104f5e7dd7Smrg#ifdef HAVE_MTRR
7114f5e7dd7Smrg    struct mtrr_sentry sentry = {
7124f5e7dd7Smrg	.base = map->base,
7134f5e7dd7Smrg        .size = map->size,
7144f5e7dd7Smrg	.type = MTRR_TYPE_UNCACHABLE
7154f5e7dd7Smrg    };
7164f5e7dd7Smrg#endif
7174f5e7dd7Smrg
7184f5e7dd7Smrg    err = pci_device_generic_unmap_range (dev, map);
7194f5e7dd7Smrg    if (err)
7204f5e7dd7Smrg	return err;
721cad31331Smrg
7224f5e7dd7Smrg#ifdef HAVE_MTRR
7234f5e7dd7Smrg    if ((map->flags & PCI_DEV_MAP_FLAG_CACHABLE) != 0) {
7244f5e7dd7Smrg        sentry.type = MTRR_TYPE_WRBACK;
7254f5e7dd7Smrg    } else if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) != 0) {
7264f5e7dd7Smrg        sentry.type = MTRR_TYPE_WRCOMB;
7274f5e7dd7Smrg    }
7284f5e7dd7Smrg
7294f5e7dd7Smrg    if (pci_sys->mtrr_fd != -1 && sentry.type != MTRR_TYPE_UNCACHABLE) {
7304f5e7dd7Smrg	if (ioctl(pci_sys->mtrr_fd, MTRRIOC_DEL_ENTRY, &sentry) < 0) {
7314f5e7dd7Smrg	    /* FIXME: Should we report an error in this case?
7324f5e7dd7Smrg	     */
7334f5e7dd7Smrg	    fprintf(stderr, "error setting MTRR "
73449310723Smrg		    "(base = 0x%016" PRIx64 ", size = 0x%08x, type = %u) %s (%d)\n",
73549310723Smrg		    (pciaddr_t)sentry.base, sentry.size, sentry.type,
7364f5e7dd7Smrg		    strerror(errno), errno);
7374f5e7dd7Smrg/*            err = errno;*/
7384f5e7dd7Smrg	}
7394f5e7dd7Smrg    }
7404f5e7dd7Smrg#endif
7414f5e7dd7Smrg
7424f5e7dd7Smrg    return err;
7434f5e7dd7Smrg}
7444f5e7dd7Smrg
74548becaf0Smrgstatic void pci_device_linux_sysfs_set_enable(struct pci_device *dev, int enable)
7464f5e7dd7Smrg{
7474f5e7dd7Smrg    char name[256];
7484f5e7dd7Smrg    int fd;
7494f5e7dd7Smrg
7504f5e7dd7Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/enable",
7514f5e7dd7Smrg	      SYS_BUS_PCI,
7524f5e7dd7Smrg	      dev->domain,
7534f5e7dd7Smrg	      dev->bus,
7544f5e7dd7Smrg	      dev->dev,
7554f5e7dd7Smrg	      dev->func );
756cad31331Smrg
757cad31331Smrg    fd = open( name, O_RDWR | O_CLOEXEC);
7584f5e7dd7Smrg    if (fd == -1)
7594f5e7dd7Smrg       return;
7604f5e7dd7Smrg
76148becaf0Smrg    write( fd, enable ? "1" : "0" , 1 );
7624f5e7dd7Smrg    close(fd);
7634f5e7dd7Smrg}
76449f872b5Smrg
76548becaf0Smrgstatic void pci_device_linux_sysfs_enable(struct pci_device *dev)
76648becaf0Smrg{
76748becaf0Smrg	return pci_device_linux_sysfs_set_enable(dev, 1);
76848becaf0Smrg}
76948becaf0Smrg
77048becaf0Smrgstatic void pci_device_linux_sysfs_disable(struct pci_device *dev)
77148becaf0Smrg{
77248becaf0Smrg	return pci_device_linux_sysfs_set_enable(dev, 0);
77348becaf0Smrg}
77448becaf0Smrg
77549f872b5Smrgstatic int pci_device_linux_sysfs_boot_vga(struct pci_device *dev)
77649f872b5Smrg{
77749f872b5Smrg    char name[256];
77849f872b5Smrg    char reply[3];
77949f872b5Smrg    int fd, bytes_read;
78049f872b5Smrg    int ret = 0;
78149f872b5Smrg
78249f872b5Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/boot_vga",
78349f872b5Smrg	      SYS_BUS_PCI,
78449f872b5Smrg	      dev->domain,
78549f872b5Smrg	      dev->bus,
78649f872b5Smrg	      dev->dev,
78749f872b5Smrg	      dev->func );
788cad31331Smrg
789cad31331Smrg    fd = open( name, O_RDONLY | O_CLOEXEC);
79049f872b5Smrg    if (fd == -1)
79149f872b5Smrg       return 0;
79249f872b5Smrg
79349f872b5Smrg    bytes_read = read(fd, reply, 1);
79449f872b5Smrg    if (bytes_read != 1)
79549f872b5Smrg	goto out;
79649f872b5Smrg    if (reply[0] == '1')
79749f872b5Smrg	ret = 1;
79849f872b5Smrgout:
79949f872b5Smrg    close(fd);
80049f872b5Smrg    return ret;
80149f872b5Smrg}
80249f872b5Smrg
80349f872b5Smrgstatic int pci_device_linux_sysfs_has_kernel_driver(struct pci_device *dev)
80449f872b5Smrg{
80549f872b5Smrg    char name[256];
80649f872b5Smrg    struct stat dummy;
80749f872b5Smrg    int ret;
80849f872b5Smrg
80949f872b5Smrg    snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/driver",
81049f872b5Smrg	      SYS_BUS_PCI,
81149f872b5Smrg	      dev->domain,
81249f872b5Smrg	      dev->bus,
81349f872b5Smrg	      dev->dev,
81449f872b5Smrg	      dev->func );
815cad31331Smrg
81649f872b5Smrg    ret = stat(name, &dummy);
81749f872b5Smrg    if (ret < 0)
81849f872b5Smrg	return 0;
81949f872b5Smrg    return 1;
82049f872b5Smrg}
821e432255dSmrg
822e432255dSmrgstatic struct pci_io_handle *
823e432255dSmrgpci_device_linux_sysfs_open_device_io(struct pci_io_handle *ret,
824e432255dSmrg				      struct pci_device *dev, int bar,
825e432255dSmrg				      pciaddr_t base, pciaddr_t size)
826e432255dSmrg{
827e432255dSmrg    char name[PATH_MAX];
828e432255dSmrg
829e432255dSmrg    snprintf(name, PATH_MAX, "%s/%04x:%02x:%02x.%1u/resource%d",
830e432255dSmrg	     SYS_BUS_PCI, dev->domain, dev->bus, dev->dev, dev->func, bar);
831e432255dSmrg
832cad31331Smrg    ret->fd = open(name, O_RDWR | O_CLOEXEC);
833e432255dSmrg
834e432255dSmrg    if (ret->fd < 0)
835e432255dSmrg	return NULL;
836e432255dSmrg
837e432255dSmrg    ret->base = base;
838e432255dSmrg    ret->size = size;
8396a94483fSmrg    ret->is_legacy = 0;
840e432255dSmrg
841e432255dSmrg    return ret;
842e432255dSmrg}
843e432255dSmrg
844e432255dSmrgstatic struct pci_io_handle *
845e432255dSmrgpci_device_linux_sysfs_open_legacy_io(struct pci_io_handle *ret,
846e432255dSmrg				      struct pci_device *dev, pciaddr_t base,
847e432255dSmrg				      pciaddr_t size)
848e432255dSmrg{
849e432255dSmrg    char name[PATH_MAX];
850e432255dSmrg
851e432255dSmrg    /* First check if there's a legacy io method for the device */
852e432255dSmrg    while (dev) {
853e432255dSmrg	snprintf(name, PATH_MAX, "/sys/class/pci_bus/%04x:%02x/legacy_io",
854e432255dSmrg		 dev->domain, dev->bus);
855e432255dSmrg
856cad31331Smrg	ret->fd = open(name, O_RDWR | O_CLOEXEC);
857e432255dSmrg	if (ret->fd >= 0)
858e432255dSmrg	    break;
859e432255dSmrg
860e432255dSmrg	dev = pci_device_get_parent_bridge(dev);
861e432255dSmrg    }
862e432255dSmrg
863cad31331Smrg    /*
864cad31331Smrg     * You would think you'd want to use /dev/port here.  Don't make that
865cad31331Smrg     * mistake, /dev/port only does byte-wide i/o cycles which means it
866cad31331Smrg     * doesn't work.  If you think this is stupid, well, you're right.
867cad31331Smrg     */
868e432255dSmrg
869cad31331Smrg    /* If we've no other choice, iopl */
870cad31331Smrg    if (ret->fd < 0) {
871cad31331Smrg	if (iopl(3))
872cad31331Smrg	    return NULL;
873cad31331Smrg    }
874e432255dSmrg
875e432255dSmrg    ret->base = base;
876e432255dSmrg    ret->size = size;
8776a94483fSmrg    ret->is_legacy = 1;
878e432255dSmrg
879e432255dSmrg    return ret;
880e432255dSmrg}
881e432255dSmrg
882e432255dSmrgstatic void
883e432255dSmrgpci_device_linux_sysfs_close_io(struct pci_device *dev,
884e432255dSmrg				struct pci_io_handle *handle)
885e432255dSmrg{
886cad31331Smrg    if (handle->fd > -1)
887cad31331Smrg	close(handle->fd);
888e432255dSmrg}
889e432255dSmrg
890e432255dSmrgstatic uint32_t
891e432255dSmrgpci_device_linux_sysfs_read32(struct pci_io_handle *handle, uint32_t port)
892e432255dSmrg{
893e432255dSmrg    uint32_t ret;
894e432255dSmrg
8956a94483fSmrg    if (handle->fd > -1) {
8966a94483fSmrg	if (handle->is_legacy)
8976a94483fSmrg	    pread(handle->fd, &ret, 4, port + handle->base);
8986a94483fSmrg	else
8996a94483fSmrg	    pread(handle->fd, &ret, 4, port);
9006a94483fSmrg    } else {
901cad31331Smrg	ret = inl(port + handle->base);
9026a94483fSmrg    }
903cad31331Smrg
904e432255dSmrg    return ret;
905e432255dSmrg}
906e432255dSmrg
907e432255dSmrgstatic uint16_t
908e432255dSmrgpci_device_linux_sysfs_read16(struct pci_io_handle *handle, uint32_t port)
909e432255dSmrg{
910e432255dSmrg    uint16_t ret;
911e432255dSmrg
9126a94483fSmrg    if (handle->fd > -1) {
9136a94483fSmrg	if (handle->is_legacy)
9146a94483fSmrg	    pread(handle->fd, &ret, 2, port + handle->base);
9156a94483fSmrg	else
9166a94483fSmrg	    pread(handle->fd, &ret, 2, port);
9176a94483fSmrg    } else {
918cad31331Smrg	ret = inw(port + handle->base);
9196a94483fSmrg    }
920e432255dSmrg
921e432255dSmrg    return ret;
922e432255dSmrg}
923e432255dSmrg
924e432255dSmrgstatic uint8_t
925e432255dSmrgpci_device_linux_sysfs_read8(struct pci_io_handle *handle, uint32_t port)
926e432255dSmrg{
927e432255dSmrg    uint8_t ret;
928e432255dSmrg
9296a94483fSmrg    if (handle->fd > -1) {
9306a94483fSmrg	if (handle->is_legacy)
9316a94483fSmrg	    pread(handle->fd, &ret, 1, port + handle->base);
9326a94483fSmrg	else
9336a94483fSmrg	    pread(handle->fd, &ret, 1, port);
9346a94483fSmrg    } else {
935cad31331Smrg	ret = inb(port + handle->base);
9366a94483fSmrg    }
937e432255dSmrg
938e432255dSmrg    return ret;
939e432255dSmrg}
940e432255dSmrg
941e432255dSmrgstatic void
942e432255dSmrgpci_device_linux_sysfs_write32(struct pci_io_handle *handle, uint32_t port,
943e432255dSmrg			       uint32_t data)
944e432255dSmrg{
9456a94483fSmrg    if (handle->fd > -1) {
9466a94483fSmrg	if (handle->is_legacy)
9476a94483fSmrg	    pwrite(handle->fd, &data, 4, port + handle->base);
9486a94483fSmrg	else
9496a94483fSmrg	    pwrite(handle->fd, &data, 4, port);
9506a94483fSmrg    } else {
951cad31331Smrg	outl(data, port + handle->base);
9526a94483fSmrg    }
953e432255dSmrg}
954e432255dSmrg
955e432255dSmrgstatic void
956e432255dSmrgpci_device_linux_sysfs_write16(struct pci_io_handle *handle, uint32_t port,
957e432255dSmrg			       uint16_t data)
958e432255dSmrg{
9596a94483fSmrg    if (handle->fd > -1) {
9606a94483fSmrg	if (handle->is_legacy)
9616a94483fSmrg	    pwrite(handle->fd, &data, 2, port + handle->base);
9626a94483fSmrg	else
9636a94483fSmrg	    pwrite(handle->fd, &data, 2, port);
9646a94483fSmrg    } else {
965cad31331Smrg	outw(data, port + handle->base);
9666a94483fSmrg    }
967e432255dSmrg}
968e432255dSmrg
969e432255dSmrgstatic void
970e432255dSmrgpci_device_linux_sysfs_write8(struct pci_io_handle *handle, uint32_t port,
971e432255dSmrg			      uint8_t data)
972e432255dSmrg{
9736a94483fSmrg    if (handle->fd > -1) {
9746a94483fSmrg	if (handle->is_legacy)
9756a94483fSmrg	    pwrite(handle->fd, &data, 1, port + handle->base);
9766a94483fSmrg	else
9776a94483fSmrg	    pwrite(handle->fd, &data, 1, port);
9786a94483fSmrg    } else {
979cad31331Smrg	outb(data, port + handle->base);
9806a94483fSmrg    }
981cad31331Smrg}
982cad31331Smrg
983cad31331Smrgstatic int
984cad31331Smrgpci_device_linux_sysfs_map_legacy(struct pci_device *dev, pciaddr_t base,
985cad31331Smrg				  pciaddr_t size, unsigned map_flags, void **addr)
986cad31331Smrg{
987cad31331Smrg    char name[PATH_MAX];
988cad31331Smrg    int flags = O_RDONLY;
989cad31331Smrg    int prot = PROT_READ;
990cad31331Smrg    int fd;
991cad31331Smrg    int ret=0;
992cad31331Smrg
993cad31331Smrg    if (map_flags & PCI_DEV_MAP_FLAG_WRITABLE) {
99448becaf0Smrg	flags = O_RDWR; /* O_RDWR != O_WRONLY | O_RDONLY */
995cad31331Smrg	prot |= PROT_WRITE;
996cad31331Smrg    }
997cad31331Smrg
998cad31331Smrg    /* First check if there's a legacy memory method for the device */
999cad31331Smrg    while (dev) {
1000cad31331Smrg	snprintf(name, PATH_MAX, "/sys/class/pci_bus/%04x:%02x/legacy_mem",
1001cad31331Smrg		 dev->domain, dev->bus);
1002cad31331Smrg
1003cad31331Smrg	fd = open(name, flags | O_CLOEXEC);
1004cad31331Smrg	if (fd >= 0)
1005cad31331Smrg	    break;
1006cad31331Smrg
1007cad31331Smrg	dev = pci_device_get_parent_bridge(dev);
1008cad31331Smrg    }
1009cad31331Smrg
1010cad31331Smrg    /* If not, /dev/mem is the best we can do */
1011cad31331Smrg    if (!dev)
1012cad31331Smrg	fd = open("/dev/mem", flags | O_CLOEXEC);
1013cad31331Smrg
1014cad31331Smrg    if (fd < 0)
1015cad31331Smrg	return errno;
1016cad31331Smrg
1017cad31331Smrg    *addr = mmap(NULL, size, prot, MAP_SHARED, fd, base);
1018cad31331Smrg    if (*addr == MAP_FAILED) {
1019cad31331Smrg	ret = errno;
1020cad31331Smrg    }
1021cad31331Smrg
1022cad31331Smrg    close(fd);
1023cad31331Smrg    return ret;
1024cad31331Smrg}
1025cad31331Smrg
1026cad31331Smrgstatic int
1027cad31331Smrgpci_device_linux_sysfs_unmap_legacy(struct pci_device *dev, void *addr, pciaddr_t size)
1028cad31331Smrg{
1029cad31331Smrg    return munmap(addr, size);
1030cad31331Smrg}
1031cad31331Smrg
1032cad31331Smrg
1033cad31331Smrgstatic void
1034cad31331Smrgpci_system_linux_destroy(void)
1035cad31331Smrg{
1036cad31331Smrg#ifdef HAVE_MTRR
1037cad31331Smrg	if (pci_sys->mtrr_fd != -1)
1038cad31331Smrg		close(pci_sys->mtrr_fd);
1039cad31331Smrg#endif
1040e432255dSmrg}
1041e432255dSmrg
1042e432255dSmrgstatic const struct pci_system_methods linux_sysfs_methods = {
1043cad31331Smrg    .destroy = pci_system_linux_destroy,
1044e432255dSmrg    .destroy_device = NULL,
1045e432255dSmrg    .read_rom = pci_device_linux_sysfs_read_rom,
1046e432255dSmrg    .probe = pci_device_linux_sysfs_probe,
1047e432255dSmrg    .map_range = pci_device_linux_sysfs_map_range,
1048e432255dSmrg    .unmap_range = pci_device_linux_sysfs_unmap_range,
1049e432255dSmrg
1050e432255dSmrg    .read = pci_device_linux_sysfs_read,
1051e432255dSmrg    .write = pci_device_linux_sysfs_write,
1052e432255dSmrg
1053e432255dSmrg    .fill_capabilities = pci_fill_capabilities_generic,
1054e432255dSmrg    .enable = pci_device_linux_sysfs_enable,
105548becaf0Smrg    .disable = pci_device_linux_sysfs_disable,
1056e432255dSmrg    .boot_vga = pci_device_linux_sysfs_boot_vga,
1057e432255dSmrg    .has_kernel_driver = pci_device_linux_sysfs_has_kernel_driver,
1058e432255dSmrg
1059e432255dSmrg    .open_device_io = pci_device_linux_sysfs_open_device_io,
1060e432255dSmrg    .open_legacy_io = pci_device_linux_sysfs_open_legacy_io,
1061e432255dSmrg    .close_io = pci_device_linux_sysfs_close_io,
1062e432255dSmrg    .read32 = pci_device_linux_sysfs_read32,
1063e432255dSmrg    .read16 = pci_device_linux_sysfs_read16,
1064e432255dSmrg    .read8 = pci_device_linux_sysfs_read8,
1065e432255dSmrg    .write32 = pci_device_linux_sysfs_write32,
1066e432255dSmrg    .write16 = pci_device_linux_sysfs_write16,
1067e432255dSmrg    .write8 = pci_device_linux_sysfs_write8,
1068cad31331Smrg
1069cad31331Smrg    .map_legacy = pci_device_linux_sysfs_map_legacy,
1070cad31331Smrg    .unmap_legacy = pci_device_linux_sysfs_unmap_legacy,
1071e432255dSmrg};
1072