diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index abb559cd28d793d052b6408e606469538e80c65b..8ae28dc0b784e5a89a6ae2c3c90d4547c00e549c 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -33,6 +33,7 @@ #include <linux/cpuidle.h> #include <linux/cpu.h> #include <acpi/processor.h> +#include <linux/kvm_para.h> /* * Include the apic definitions for x86 to have the APIC timer related defines @@ -665,7 +666,8 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx) /* Dummy wait op - must do something useless after P_LVL2 read because chipsets cannot guarantee that STPCLK# signal gets asserted in time to freeze execution properly. */ - inl(acpi_gbl_FADT.xpm_timer_block.address); + if (!kvm_para_available()) + inl(acpi_gbl_FADT.xpm_timer_block.address); } } @@ -687,7 +689,8 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { inb(cx->address); /* See comment in acpi_idle_do_entry() */ - inl(acpi_gbl_FADT.xpm_timer_block.address); + if (!kvm_para_available()) + inl(acpi_gbl_FADT.xpm_timer_block.address); } else return -ENODEV; } diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 6b237e3f4983046cdc8327b75c173952fbc23f86..cd66b94bc21b8eccca856b774e0002cbb6d1224b 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -51,9 +51,22 @@ MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); static struct vfsmount *balloon_mnt; #endif +enum virtio_balloon_vq { + VIRTIO_BALLOON_VQ_INFLATE, + VIRTIO_BALLOON_VQ_DEFLATE, + VIRTIO_BALLOON_VQ_STATS, + VIRTIO_BALLOON_VQ_FREE_PAGE, + VIRTIO_BALLOON_VQ_MAX +}; + struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; + + /* Balloon's own wq for cpu-intensive work items */ + struct workqueue_struct *balloon_wq; + /* The free page reporting work item submitted to the balloon wq */ + struct work_struct report_free_page_work; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; @@ -63,6 +76,13 @@ struct virtio_balloon { spinlock_t stop_update_lock; bool stop_update; + /* The new cmd id received from host */ + uint32_t cmd_id_received; + /* The cmd id that is actively in use */ + __virtio32 cmd_id_active; + /* Buffer to store the stop sign */ + __virtio32 stop_cmd_id; + /* Waiting for host to ack the pages we released. */ wait_queue_head_t acked; @@ -326,17 +346,6 @@ static void stats_handle_request(struct virtio_balloon *vb) virtqueue_kick(vq); } -static void virtballoon_changed(struct virtio_device *vdev) -{ - struct virtio_balloon *vb = vdev->priv; - unsigned long flags; - - spin_lock_irqsave(&vb->stop_update_lock, flags); - if (!vb->stop_update) - queue_work(system_freezable_wq, &vb->update_balloon_size_work); - spin_unlock_irqrestore(&vb->stop_update_lock, flags); -} - static inline s64 towards_target(struct virtio_balloon *vb) { s64 target; @@ -353,6 +362,34 @@ static inline s64 towards_target(struct virtio_balloon *vb) return target - vb->num_pages; } +static void virtballoon_changed(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + unsigned long flags; + s64 diff = towards_target(vb); + + if (diff) { + spin_lock_irqsave(&vb->stop_update_lock, flags); + if (!vb->stop_update) + queue_work(system_freezable_wq, + &vb->update_balloon_size_work); + spin_unlock_irqrestore(&vb->stop_update_lock, flags); + } + + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + virtio_cread(vdev, struct virtio_balloon_config, + free_page_report_cmd_id, &vb->cmd_id_received); + if (vb->cmd_id_received != + VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID) { + spin_lock_irqsave(&vb->stop_update_lock, flags); + if (!vb->stop_update) + queue_work(vb->balloon_wq, + &vb->report_free_page_work); + spin_unlock_irqrestore(&vb->stop_update_lock, flags); + } + } +} + static void update_balloon_size(struct virtio_balloon *vb) { u32 actual = vb->num_pages; @@ -425,44 +462,196 @@ static void update_balloon_size_func(struct work_struct *work) queue_work(system_freezable_wq, work); } +static void free_page_vq_cb(struct virtqueue *vq) +{ + unsigned int unused; + + while (virtqueue_get_buf(vq, &unused)) + ; +} + static int init_vqs(struct virtio_balloon *vb) { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; - static const char * const names[] = { "inflate", "deflate", "stats" }; - int err, nvqs; + struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX]; + vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX]; + const char *names[VIRTIO_BALLOON_VQ_MAX]; + struct scatterlist sg; + int ret; /* - * We expect two virtqueues: inflate and deflate, and - * optionally stat. + * Inflateq and deflateq are used unconditionally. The names[] + * will be NULL if the related feature is not enabled, which will + * cause no allocation for the corresponding virtqueue in find_vqs. */ - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); - if (err) - return err; + callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack; + names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; + callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; + names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; + names[VIRTIO_BALLOON_VQ_STATS] = NULL; + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; - vb->inflate_vq = vqs[0]; - vb->deflate_vq = vqs[1]; if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { - struct scatterlist sg; - unsigned int num_stats; - vb->stats_vq = vqs[2]; + names[VIRTIO_BALLOON_VQ_STATS] = "stats"; + callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request; + } + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq"; + callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = free_page_vq_cb; + } + + ret = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, + vqs, callbacks, names, NULL, NULL); + if (ret) + return ret; + vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE]; + vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE]; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS]; /* * Prime this virtqueue with one buffer so the hypervisor can * use it to signal us later (it can't be broken yet!). */ - num_stats = update_balloon_stats(vb); - - sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); - if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) - < 0) - BUG(); + sg_init_one(&sg, vb->stats, sizeof(vb->stats)); + ret = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, + GFP_KERNEL); + if (ret) { + dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", + __func__); + return ret; + } virtqueue_kick(vb->stats_vq); } + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE]; + + return 0; +} + +static int add_one_sg(struct virtqueue *vq, unsigned long pfn, uint32_t len) +{ + struct scatterlist sg; + unsigned int unused; + + sg_init_table(&sg, 1); + sg_set_page(&sg, pfn_to_page(pfn), len, 0); + + /* Detach all the used buffers from the vq */ + while (virtqueue_get_buf(vq, &unused)) + ; + + /* + * Since this is an optimization feature, losing a couple of free + * pages to report isn't important. We simply return without adding + * this page hint if the vq is full. + * We are adding one entry each time, which essentially results in no + * memory allocation, so the GFP_KERNEL flag below can be ignored. + * Host works by polling the free page vq for hints after sending the + * starting cmd id, so the driver doesn't need to kick after filling + * the vq. + * Lastly, there is always one entry reserved for the cmd id to use. + * + * TODO: The current implementation could be further improved by + * stopping the reporting when the vq is full and continuing the + * reporting when host notifies the driver that entries have been + * used. + */ + if (vq->num_free > 1) + return virtqueue_add_inbuf(vq, &sg, 1, vq, GFP_KERNEL); + return 0; } +static int virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, + unsigned long nr_pages) +{ + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; + uint32_t len = nr_pages << PAGE_SHIFT; + + /* + * If a stop id or a new cmd id was just received from host, stop + * the reporting, and return -EINTR to indicate an active stop. + * + * Ideally, we could have cmd_id_received accessed under locks, which + * ensures that no more entries are added to the vq once a stop cmd + * id is received from host. This requires host to wait for the + * driver's ACK about finishing the update of cmd_id_received. But + * this is not how host side works, because host doesn't work on an + * assumption that the driver would always be responsive. So + * theorically, there are possibilities that some entries may stay in + * the vq when host has exited the optimization. This isn't an issue, + * because entries simply contain guest physical addresses. There is no + * allocated memory that needs to be freed or dma mapped pages that + * need to be unmapped since virtio-balloon works with + * VIRTIO_F_IOMMU_PLATFORM disabled. + */ + if (virtio32_to_cpu(vb->vdev, vb->cmd_id_active) != + vb->cmd_id_received) + return -EINTR; + + return add_one_sg(vb->free_page_vq, pfn, len); +} + +static int send_start_cmd_id(struct virtio_balloon *vb, uint32_t cmd_id) +{ + struct scatterlist sg; + struct virtqueue *vq = vb->free_page_vq; + + vb->cmd_id_active = cpu_to_virtio32(vb->vdev, cmd_id); + sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active)); + return virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); +} + +static int send_stop_cmd_id(struct virtio_balloon *vb) +{ + struct scatterlist sg; + struct virtqueue *vq = vb->free_page_vq; + + sg_init_one(&sg, &vb->stop_cmd_id, sizeof(vb->stop_cmd_id)); + return virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); +} + +static void report_free_page_func(struct work_struct *work) +{ + struct virtio_balloon *vb; + struct virtqueue *vq; + int ret; + + vb = container_of(work, struct virtio_balloon, report_free_page_work); + vq = vb->free_page_vq; + + /* Start by sending the received cmd id to host with an outbuf. */ + ret = send_start_cmd_id(vb, vb->cmd_id_received); + if (unlikely(ret)) + goto err; + + ret = walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages); + /* + * -EINTR is the case that host actively stops the reporting, so we + * don't treat it as an error that needs to bail out. + */ + if (unlikely(ret == -EIO)) + goto err; + + /* End by sending a stop id to host with an outbuf. */ + ret = send_stop_cmd_id(vb); + if (unlikely(ret)) + goto err; + + /* + * The used buffers are not detached here, because host consumes + * entries asychronously and will send a vq interrupt after putting + * back all the used buffers. So free_page_vq_cb will take care of + * the used buffer detaching work. + */ + return; +err: + dev_err(&vb->vdev->dev, "%s: free page vq failure, ret=%d\n", + __func__, ret); +} + #ifdef CONFIG_BALLOON_COMPACTION /* * virtballoon_migratepage - perform the balloon page migration on behalf of @@ -547,6 +736,7 @@ static struct file_system_type balloon_fs = { static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; + __u32 poison_val; int err; if (!vdev->config->get) { @@ -576,18 +766,41 @@ static int virtballoon_probe(struct virtio_device *vdev) if (err) goto out_free_vb; + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + /* + * There is always one entry reserved for cmd id, so the ring + * size needs to be at least two to report free page hints. + */ + if (virtqueue_get_vring_size(vb->free_page_vq) < 2) + goto out_free_vb; + vb->balloon_wq = alloc_workqueue("balloon-wq", + WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); + if (!vb->balloon_wq) { + err = -ENOMEM; + goto out_del_vqs; + } + vb->stop_cmd_id = cpu_to_virtio32(vb->vdev, + VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID); + INIT_WORK(&vb->report_free_page_work, report_free_page_func); + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { + memset(&poison_val, PAGE_POISON, sizeof(poison_val)); + virtio_cwrite(vb->vdev, struct virtio_balloon_config, + poison_val, &poison_val); + } + } + vb->nb.notifier_call = virtballoon_oom_notify; vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; err = register_oom_notifier(&vb->nb); if (err < 0) - goto out_del_vqs; + goto out_del_balloon_wq; #ifdef CONFIG_BALLOON_COMPACTION balloon_mnt = kern_mount(&balloon_fs); if (IS_ERR(balloon_mnt)) { err = PTR_ERR(balloon_mnt); unregister_oom_notifier(&vb->nb); - goto out_del_vqs; + goto out_del_balloon_wq; } vb->vb_dev_info.migratepage = virtballoon_migratepage; @@ -597,7 +810,7 @@ static int virtballoon_probe(struct virtio_device *vdev) kern_unmount(balloon_mnt); unregister_oom_notifier(&vb->nb); vb->vb_dev_info.inode = NULL; - goto out_del_vqs; + goto out_del_balloon_wq; } vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif @@ -608,6 +821,9 @@ static int virtballoon_probe(struct virtio_device *vdev) virtballoon_changed(vdev); return 0; +out_del_balloon_wq: + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + destroy_workqueue(vb->balloon_wq); out_del_vqs: vdev->config->del_vqs(vdev); out_free_vb: @@ -641,6 +857,11 @@ static void virtballoon_remove(struct virtio_device *vdev) cancel_work_sync(&vb->update_balloon_size_work); cancel_work_sync(&vb->update_balloon_stats_work); + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + cancel_work_sync(&vb->report_free_page_work); + destroy_workqueue(vb->balloon_wq); + } + remove_common(vb); #ifdef CONFIG_BALLOON_COMPACTION if (vb->vb_dev_info.inode) @@ -684,6 +905,9 @@ static int virtballoon_restore(struct virtio_device *vdev) static int virtballoon_validate(struct virtio_device *vdev) { + if (!page_poisoning_enabled()) + __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON); + __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); return 0; } @@ -692,6 +916,8 @@ static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, + VIRTIO_BALLOON_F_FREE_PAGE_HINT, + VIRTIO_BALLOON_F_PAGE_POISON, }; static struct virtio_driver virtio_balloon_driver = { diff --git a/include/linux/mm.h b/include/linux/mm.h index 1ac1f06a4be6b22faf3883c760515a042a6d347e..4d1aff80669ce4a5d7c40594dca92294a681d39c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1957,6 +1957,12 @@ extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); extern void free_initmem(void); +extern int walk_free_mem_block(void *opaque, + int min_order, + int (*report_pfn_range)(void *opaque, + unsigned long pfn, + unsigned long num)); + /* * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK) * into the buddy system. The freed pages will be poisoned with pattern diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6894976b54e376da3c203932ec2c1152ddfc527d..7a5040d8614ceab93f526d7e72342e3b0e66a2ac 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -31,16 +31,25 @@ #include <asm/errno.h> #endif +/* entries must start with the following structure */ +struct plist { + struct plist *next; + struct plist *last; /* only valid in the 1st entry */ +}; + struct ptr_ring { int producer ____cacheline_aligned_in_smp; spinlock_t producer_lock; int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */ int consumer_tail; /* next entry to invalidate */ + struct plist *consumer_list; + int list_num; spinlock_t consumer_lock; /* Shared consumer/producer data */ /* Read-only by both the producer and the consumer */ int size ____cacheline_aligned_in_smp; /* max entries in queue */ int batch; /* number of entries to consume in a batch */ + int list_size; void **queue; }; @@ -121,10 +130,42 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) } /* - * Note: resize (below) nests producer lock within consumer lock, so if you - * consume in interrupt or BH context, you must disable interrupts/BH when - * calling this. + * Note: resize API with the _fallback should be used when calling this. */ +static inline int ptr_ring_produce_fallback(struct ptr_ring *r, void *ptr) +{ + int ret; + unsigned long flags; + struct plist *p = ptr; + + p->next = NULL; + p->last = p; + + spin_lock_irqsave(&r->producer_lock, flags); + ret = __ptr_ring_produce(r, ptr); + if (ret && r->list_size) { + spin_lock(&r->consumer_lock); + ret = __ptr_ring_produce(r, ptr); + if (ret && r->list_num < r->list_size) { + int producer = r->producer ? r->producer - 1 : + r->size - 1; + struct plist *first = r->queue[producer]; + + BUG_ON(!first); + + first->last->next = p; + first->last = p; + + r->list_num++; + } + spin_unlock(&r->consumer_lock); + } + + spin_unlock_irqrestore(&r->producer_lock, flags); + + return ret; +} + static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr) { int ret; @@ -136,6 +177,7 @@ static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr) return ret; } + static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr) { int ret; @@ -373,6 +415,27 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r) return ptr; } +static inline void *ptr_ring_consume_fallback(struct ptr_ring *r) +{ + unsigned long flags; + struct plist *ptr; + + spin_lock_irqsave(&r->consumer_lock, flags); + if (r->consumer_list) { + ptr = r->consumer_list; + r->consumer_list = ptr->next; + r->list_num--; + } else { + ptr = __ptr_ring_consume(r); + if (ptr) { + r->consumer_list = ptr->next; + } + } + spin_unlock_irqrestore(&r->consumer_lock, flags); + + return ptr; +} + static inline int ptr_ring_consume_batched(struct ptr_ring *r, void **array, int n) { @@ -488,7 +551,8 @@ static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) r->batch = 1; } -static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) +static inline int ptr_ring_init_fallback(struct ptr_ring *r, int size, gfp_t gfp, + int fallback_size) { r->queue = __ptr_ring_init_queue_alloc(size, gfp); if (!r->queue) @@ -498,10 +562,17 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) r->producer = r->consumer_head = r->consumer_tail = 0; spin_lock_init(&r->producer_lock); spin_lock_init(&r->consumer_lock); + r->list_size = fallback_size; + r->consumer_list = NULL; return 0; } +static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) +{ + return ptr_ring_init_fallback(r, size, gfp, 0); +} + /* * Return entries into ring. Destroy entries that don't fit. * diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index bbf32524ab279d8e353ca3d9d854a1ab2a12805c..fab02133a9197a43cd9df33728e657e1679c5e16 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -35,7 +35,7 @@ static inline void virtio_rmb(bool weak_barriers) if (weak_barriers) virt_rmb(); else - rmb(); + dma_rmb(); } static inline void virtio_wmb(bool weak_barriers) @@ -43,7 +43,7 @@ static inline void virtio_wmb(bool weak_barriers) if (weak_barriers) virt_wmb(); else - wmb(); + dma_wmb(); } static inline void virtio_store_mb(bool weak_barriers, diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 13b8cb563892b7ca66a6268738b452c8428f006c..1477c1792ffd1221a0805e82643a2f62ecede0c0 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -34,15 +34,22 @@ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ +#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ +#define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID 0 struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ __u32 num_pages; /* Number of pages we've actually got in balloon. */ __u32 actual; + /* Free page report command id, readonly by guest */ + __u32 free_page_report_cmd_id; + /* Stores PAGE_POISON if page poisoning is in use */ + __u32 poison_val; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 905db9d7962fcb1776c0e7ffb1618fb6e4084a75..a13077bb6de863fbe0e7a1bbadb8d50e43d1c5dc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5038,6 +5038,103 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) show_swap_cache_info(); } +/* + * Walk through a free page list and report the found pfn range via the + * callback. + * + * Return 0 if it completes the reporting. Otherwise, return the non-zero + * value returned from the callback. + */ +static int walk_free_page_list(void *opaque, + struct zone *zone, + int order, + enum migratetype mt, + int (*report_pfn_range)(void *, + unsigned long, + unsigned long)) +{ + struct page *page; + struct list_head *list; + unsigned long pfn, flags; + int ret = 0; + + spin_lock_irqsave(&zone->lock, flags); + list = &zone->free_area[order].free_list[mt]; + list_for_each_entry(page, list, lru) { + pfn = page_to_pfn(page); + ret = report_pfn_range(opaque, pfn, 1 << order); + if (ret) + break; + } + spin_unlock_irqrestore(&zone->lock, flags); + + return ret; +} + +/** + * walk_free_mem_block - Walk through the free page blocks in the system + * @opaque: the context passed from the caller + * @min_order: the minimum order of free lists to check + * @report_pfn_range: the callback to report the pfn range of the free pages + * + * If the callback returns a non-zero value, stop iterating the list of free + * page blocks. Otherwise, continue to report. + * + * Please note that there are no locking guarantees for the callback and + * that the reported pfn range might be freed or disappear after the + * callback returns so the caller has to be very careful how it is used. + * + * The callback itself must not sleep or perform any operations which would + * require any memory allocations directly (not even GFP_NOWAIT/GFP_ATOMIC) + * or via any lock dependency. It is generally advisable to implement + * the callback as simple as possible and defer any heavy lifting to a + * different context. + * + * There is no guarantee that each free range will be reported only once + * during one walk_free_mem_block invocation. + * + * pfn_to_page on the given range is strongly discouraged and if there is + * an absolute need for that make sure to contact MM people to discuss + * potential problems. + * + * The function itself might sleep so it cannot be called from atomic + * contexts. + * + * In general low orders tend to be very volatile and so it makes more + * sense to query larger ones first for various optimizations which like + * ballooning etc... This will reduce the overhead as well. + * + * Return 0 if it completes the reporting. Otherwise, return the non-zero + * value returned from the callback. + */ +int walk_free_mem_block(void *opaque, + int min_order, + int (*report_pfn_range)(void *opaque, + unsigned long pfn, + unsigned long num)) +{ + struct zone *zone; + int order; + enum migratetype mt; + int ret; + + for_each_populated_zone(zone) { + for (order = MAX_ORDER - 1; order >= min_order; order--) { + for (mt = 0; mt < MIGRATE_TYPES; mt++) { + ret = walk_free_page_list(opaque, zone, + order, mt, + report_pfn_range); + if (ret) + return ret; + } + cond_resched(); + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(walk_free_mem_block); + static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) { zoneref->zone = zone; diff --git a/mm/page_poison.c b/mm/page_poison.c index aa2b3d34e8eaa26018267314fe88568b382dbebc..830f60489b14b4dd0f15313201a34b0e41dbe65d 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -17,6 +17,11 @@ static int __init early_page_poison_param(char *buf) } early_param("page_poison", early_page_poison_param); +/** + * page_poisoning_enabled - check if page poisoning is enabled + * + * Return true if page poisoning is enabled, or false if not. + */ bool page_poisoning_enabled(void) { /* @@ -29,6 +34,7 @@ bool page_poisoning_enabled(void) (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && debug_pagealloc_enabled())); } +EXPORT_SYMBOL_GPL(page_poisoning_enabled); static void poison_page(struct page *page) {