diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 81fb1b0..7b73747 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -232,6 +232,10 @@ source "init/Kconfig" source "kernel/Kconfig.freezer" menu "Processor type and features" +config PALACIOS + bool "Palacios support" + help + No help. source "kernel/time/Kconfig" diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 631958a..42a0969 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o +obj-$(CONFIG_PALACIOS) += palacios.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 5acdbc7..f46b207 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1303,7 +1303,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) decl PER_CPU_VAR(irq_count) jmp error_exit CFI_ENDPROC -END(do_hypervisor_callback) +END(xen_do_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. diff --git a/arch/x86/kernel/palacios.c b/arch/x86/kernel/palacios.c new file mode 100644 index 0000000..b4f5cbf --- /dev/null +++ b/arch/x86/kernel/palacios.c @@ -0,0 +1,132 @@ +/* + * palacios.c + * + * Created on: Jul 12, 2011 + * Author: vedun + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#define NO_SYM_HV 0 +#define SYM_HV_VMX 1 +#define SYM_HV_SVM 2 +#define SYM_PAGE_SIZE 12 +#define SYM_CPUID_NUM 0x90000000 +#define MEM_OFFSET_HCALL 0x1000 + +#define SYM_MSR_GLOBAL 0x00000534 + +static struct v3_symspy_global_page { + uint64_t magic; + + union { + uint32_t feature_flags; + struct { + uint8_t pci_map_valid : 1; + uint8_t symmod_enabled : 1; + uint8_t sec_symmod_enabled : 1; + } __attribute__((packed)); + } __attribute__((packed)); + + uint8_t pci_pt_map[(4 * 256) / 8]; // we're hardcoding this: (4 busses, 256 max devs) + +} __attribute__((packed)) *symspy_global_page; + + +static int symspy_is_initialized = 0; +static int vm_is_detected = 0; +static unsigned long long mem_offset = 0; + +static int detect_sym_hv(void) { + unsigned int eax = 0, ebx = 0; + printk("Detecting symbiotic hypervisor..\n"); + + asm volatile( + "cpuid;" :"=a"(eax),"=b"(ebx):"a"((unsigned int)SYM_CPUID_NUM) + ); + + if(eax == *(unsigned int*)"V3V") { + printk("V3VEE detected: arch %s.\n", (char*)&ebx); + + if(ebx == *(unsigned int*)"SVM") + return SYM_HV_SVM; + else if(ebx == *(unsigned int*)"VMX") + return SYM_HV_VMX; + else { + printk("Bad signature!\n"); + return NO_SYM_HV; + } + } + + printk("V3VEE not detected. EAX %x EBX %x\n", eax, ebx); + + return NO_SYM_HV; +} + +static int symbiotic_test(void) { + int detect = 0; + void* vaddr; + dma_addr_t paddr; + + printk("SYMBIOTIC TEST START\n"); + if((detect = detect_sym_hv()) != NO_SYM_HV) { + int status = 0; + if(detect == SYM_HV_SVM) { + asm volatile( + "vmmcall;" + :"=a"(status), "=b"(mem_offset):"a"(MEM_OFFSET_HCALL) + ); + } else { + asm volatile( + "vmcall;" + :"=a"(status), "=b"(mem_offset):"a"(MEM_OFFSET_HCALL) + ); + } + if(status != 0) { + printk("Hypercall finished with error.\n"); + } else { + printk("Detected memory offset %llx.\n", mem_offset); + } + + vaddr = (void *) __get_free_page(GFP_KERNEL); + paddr = virt_to_phys(vaddr); + //unsigned long long value = paddr; + wrmsr(SYM_MSR_GLOBAL, paddr & 0xFFFFFFFF, paddr >> 32); + symspy_global_page = vaddr; + + printk("SymspyGlobalPage detected at VA %LX, PA %LX\n", (long long)vaddr, (long long)paddr); + + return 1; + } + return 0; +} + + +uint64_t palacios_get_device_dma_offset(int bus, int dev, int func) { + if (bus >= 4) + return 0; + + if (!symspy_is_initialized) { + vm_is_detected = detect_sym_hv(); + symbiotic_test(); + symspy_is_initialized = 1; + } + + if (vm_is_detected) { + int dev_index = (bus << 8) + (dev << 3) + func; + int major = dev_index / 8; + int minor = dev_index % 8; + return ((symspy_global_page->pci_pt_map[major] & (1 << minor)) == 0) ? 0 : mem_offset; + } else { + return 0; + } +} + diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 6ac3931..efbc143 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -169,7 +169,10 @@ again: return NULL; } - *dma_addr = addr; + *dma_addr = addr + dev->dma_offset; + if (dev->dma_offset != 0) { + printk("Alloc %lX %lX : %lX\n", (long unsigned)*dma_addr , (long unsigned)(*dma_addr - dev->dma_offset), (long unsigned)page); + } return page_address(page); } diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a3933d4..b738fef 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -30,7 +30,8 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, struct dma_attrs *attrs) { - dma_addr_t bus = page_to_phys(page) + offset; + dma_addr_t bus = page_to_phys(page) + offset + dev->dma_offset; + printk("map_page: %lX %lX : %lX\n", (long unsigned)bus, (long unsigned)(bus - dev->dma_offset), (long unsigned)page); WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) return bad_dma_address; @@ -64,7 +65,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, for_each_sg(sg, s, nents, i) { BUG_ON(!sg_page(s)); - s->dma_address = sg_phys(s); + s->dma_address = sg_phys(s) + hwdev->dma_offset; if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) return 0; s->dma_length = s->length; diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index c34dca8..08e5e33 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -1491,7 +1491,7 @@ static void uv_init_per_cpu(int nuvhubs) int uvhub; short socket = 0; unsigned short socket_mask; - unsigned int uvhub_mask; + unsigned int uvhub_mask = 0; struct bau_control *bcp; struct uvhub_desc *bdp; struct socket_desc *sdp; diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 35236aa..9b63cba 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -3225,7 +3225,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) VPRINTK("ENTER\n"); - WARN_ON(ATA_MAX_QUEUE > AHCI_MAX_CMDS); + //WARN_ON(ATA_MAX_QUEUE > AHCI_MAX_CMDS); if (!printed_version++) dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); diff --git a/drivers/base/core.c b/drivers/base/core.c index fab9f76..f4c9e60 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -557,6 +557,7 @@ static void klist_children_put(struct klist_node *n) */ void device_initialize(struct device *dev) { + dev->dma_offset = 0; dev->kobj.kset = devices_kset; kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 1eef267..a6044ed 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -473,7 +473,7 @@ static ssize_t dev_show_unique_id(struct device *dev, { drive_info_struct *drv = to_drv(dev); struct ctlr_info *h = to_hba(drv->dev.parent); - __u8 sn[16]; + __u8 sn[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; unsigned long flags; int ret = 0; diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index cef28a7..b2046e4 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -17,6 +17,10 @@ #include "pci.h" +#ifdef CONFIG_PALACIOS +#include +#endif + /** * pci_bus_alloc_resource - allocate a resource from a parent bus * @bus: PCI bus @@ -92,6 +96,13 @@ int pci_bus_add_device(struct pci_dev *dev) dev->is_added = 1; pci_proc_attach_device(dev); pci_create_sysfs_dev_files(dev); + +#ifdef CONFIG_PALACIOS + dev->dev.dma_offset = palacios_get_device_dma_offset + (dev->bus->number, dev->devfn >> 3, dev->devfn & 0x7); + printk("Palacios for %d:%d : DMA Offset is %lX\n", dev->bus->number, dev->devfn, (unsigned long)dev->dev.dma_offset); +#endif + return 0; } diff --git a/fs/compat.c b/fs/compat.c index dc7853a..c8fb2f3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -15,6 +15,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -817,8 +818,6 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, return retval; } -#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) - struct compat_old_linux_dirent { compat_ulong_t d_ino; compat_ulong_t d_offset; @@ -907,7 +906,7 @@ static int compat_filldir(void *__buf, const char *name, int namlen, struct compat_linux_dirent __user * dirent; struct compat_getdents_callback *buf = __buf; compat_ulong_t d_ino; - int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(compat_long_t)); + int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + namlen + 2, sizeof(compat_long_t)); buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) @@ -994,7 +993,7 @@ static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t { struct linux_dirent64 __user *dirent; struct compat_getdents_callback64 *buf = __buf; - int jj = NAME_OFFSET(dirent); + int jj = offsetof(struct compat_linux_dirent, d_name); int reclen = ALIGN(jj + namlen + 1, sizeof(u64)); u64 off; diff --git a/fs/readdir.c b/fs/readdir.c index 7723401..ab07ead 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -4,6 +4,7 @@ * Copyright (C) 1995 Linus Torvalds */ +#include #include #include #include @@ -54,7 +55,6 @@ EXPORT_SYMBOL(vfs_readdir); * anyway. Thus the special "fillonedir()" function for that * case (the low-level handlers don't need to care about this). */ -#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) #ifdef __ARCH_WANT_OLD_READDIR @@ -152,7 +152,7 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset, struct linux_dirent __user * dirent; struct getdents_callback * buf = (struct getdents_callback *) __buf; unsigned long d_ino; - int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(long)); + int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) @@ -237,7 +237,7 @@ static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, { struct linux_dirent64 __user *dirent; struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; - int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(u64)); + int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 1, sizeof(u64)); buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) diff --git a/include/Kbuild b/include/Kbuild index 8d226bf..f1da0d9 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -10,3 +10,4 @@ header-y += video/ header-y += drm/ header-y += xen/ header-y += scsi/ +header-y += palacios/ diff --git a/include/linux/device.h b/include/linux/device.h index 2ea3e49..ad6273d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -407,6 +407,7 @@ struct device { allocations such descriptors. */ struct device_dma_parameters *dma_parms; + u64 dma_offset; /* Palacios dma offset */ struct list_head dma_pools; /* dma pools (if dma'ble) */ diff --git a/include/palacios/Kbuild b/include/palacios/Kbuild new file mode 100644 index 0000000..4bed9d9 --- /dev/null +++ b/include/palacios/Kbuild @@ -0,0 +1 @@ +header-y += palacios.h \ No newline at end of file diff --git a/include/palacios/palacios.h b/include/palacios/palacios.h new file mode 100644 index 0000000..b7077a1 --- /dev/null +++ b/include/palacios/palacios.h @@ -0,0 +1,13 @@ +/* + * palacios.h + * + * Created on: Jul 12, 2011 + * Author: vedun + */ + +#ifndef __PALACIOS_H +#define __PALACIOS_H + +uint64_t palacios_get_device_dma_offset(int bus, int dev, int func); + +#endif /* __PALACIOS_H */ diff --git a/init/main.c b/init/main.c index 1a9af60..8c8bc23 100644 --- a/init/main.c +++ b/init/main.c @@ -744,11 +744,10 @@ int do_one_initcall(initcall_t fn) calltime = ktime_get(); trace_boot_call(&call, fn); enable_boot_trace(); - } + - ret.result = fn(); + ret.result = fn(); - if (initcall_debug) { disable_boot_trace(); rettime = ktime_get(); delta = ktime_sub(rettime, calltime); @@ -756,6 +755,8 @@ int do_one_initcall(initcall_t fn) trace_boot_ret(&ret, fn); printk("initcall %pF returned %d after %Ld usecs\n", fn, ret.result, ret.duration); + } else { + ret.result = fn(); } msgbuf[0] = 0; diff --git a/kernel/async.c b/kernel/async.c index 27235f5..393e033 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -284,17 +284,17 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, if (initcall_debug && system_state == SYSTEM_BOOTING) { printk("async_waiting @ %i\n", task_pid_nr(current)); starttime = ktime_get(); - } - wait_event(async_done, lowest_in_progress(running) >= cookie); + wait_event(async_done, lowest_in_progress(running) >= cookie); - if (initcall_debug && system_state == SYSTEM_BOOTING) { endtime = ktime_get(); delta = ktime_sub(endtime, starttime); printk("async_continuing @ %i after %lli usec\n", task_pid_nr(current), (long long)ktime_to_ns(delta) >> 10); + } else { + wait_event(async_done, lowest_in_progress(running) >= cookie); } } EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain); diff --git a/lib/iomap.c b/lib/iomap.c index d322293..bd32c25 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -258,20 +258,27 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) resource_size_t start = pci_resource_start(dev, bar); resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); + void __iomem *ret = NULL; if (!len || !start) return NULL; if (maxlen && len > maxlen) len = maxlen; - if (flags & IORESOURCE_IO) - return ioport_map(start, len); + if (flags & IORESOURCE_IO) { + ret = ioport_map(start, len); + goto end; + } if (flags & IORESOURCE_MEM) { - if (flags & IORESOURCE_CACHEABLE) - return ioremap(start, len); - return ioremap_nocache(start, len); + if (flags & IORESOURCE_CACHEABLE) { + ret = ioremap(start, len); + goto end; + } + ret = ioremap_nocache(start, len); } +end: + printk("DEBUG : Mapping %lX..%lX to %lX\n", (unsigned long)start, (unsigned long)len, (unsigned long)ret); /* What? */ - return NULL; + return ret; } void pci_iounmap(struct pci_dev *dev, void __iomem * addr)