8. Happy hacking.
- -----------------------------------
-
- Maintainers List (try to look for most precise areas first)
+ Descriptions of section entries:
+
+ P: Person (obsolete)
+ M: Mail patches to: FullName <address@domain>
+ L: Mailing list that is relevant to this area
+ W: Web-page with status/info
+ T: SCM tree type and location. Type is one of: git, hg, quilt, stgit.
+ S: Status, one of the following:
+ Supported: Someone is actually paid to look after this.
+ Maintained: Someone actually looks after it.
+ Odd Fixes: It has a maintainer but they don't have time to do
+ much other than throw the odd patch in. See below..
+ Orphan: No current maintainer [but maybe you could take the
+ role as you write your new code].
+ Obsolete: Old code. Something tagged obsolete generally means
+ it has been replaced by a better system and you
+ should be using that.
+ F: Files and directories with wildcard patterns.
+ A trailing slash includes all files and subdirectory files.
+ F: drivers/net/ all files in and below drivers/net
+ F: drivers/net/* all files in drivers/net, but not below
+ F: */net/* all files in "any top level directory"/net
+ One pattern per line. Multiple F: lines acceptable.
+ X: Files and directories that are NOT maintained, same rules as F:
+ Files exclusions are tested before file matches.
+ Can be useful for excluding a specific subdirectory, for instance:
+ F: net/
+ X: net/ipv6/
+ matches all files in and below net excluding net/ipv6/
+ K: Keyword perl extended regex pattern to match content in a
+ patch or file. For instance:
+ K: of_get_profile
+ matches patches or files that contain "of_get_profile"
+ K: \b(printk|pr_(info|err))\b
+ matches patches or files that contain one or more of the words
+ printk, pr_info or pr_err
+ One regex pattern per line. Multiple K: lines acceptable.
Note: For the hard of thinking, this list is meant to remain in alphabetical
order. If you could add yourselves to it in alphabetical order that would be
so much easier [Ed]
- P: Person (obsolete)
- M: Mail patches to: FullName <address@domain>
- L: Mailing list that is relevant to this area
- W: Web-page with status/info
- T: SCM tree type and location. Type is one of: git, hg, quilt, stgit.
- S: Status, one of the following:
-
- Supported: Someone is actually paid to look after this.
- Maintained: Someone actually looks after it.
- Odd Fixes: It has a maintainer but they don't have time to do
- much other than throw the odd patch in. See below..
- Orphan: No current maintainer [but maybe you could take the
- role as you write your new code].
- Obsolete: Old code. Something tagged obsolete generally means
- it has been replaced by a better system and you
- should be using that.
+ Maintainers List (try to look for most precise areas first)
- F: Files and directories with wildcard patterns.
- A trailing slash includes all files and subdirectory files.
- F: drivers/net/ all files in and below drivers/net
- F: drivers/net/* all files in drivers/net, but not below
- F: */net/* all files in "any top level directory"/net
- One pattern per line. Multiple F: lines acceptable.
- X: Files and directories that are NOT maintained, same rules as F:
- Files exclusions are tested before file matches.
- Can be useful for excluding a specific subdirectory, for instance:
- F: net/
- X: net/ipv6/
- matches all files in and below net excluding net/ipv6/
+ -----------------------------------
3C505 NETWORK DRIVER
M: Philip Blundell <philb@gnu.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
+ ARM/CONTEC MICRO9 MACHINE SUPPORT
+ M: Hubert Feurstein <hubert.feurstein@contec.at>
+ S: Maintained
+ F: arch/arm/mach-ep93xx/micro9.c
+
ARM/CORGI MACHINE SUPPORT
M: Richard Purdie <rpurdie@rpsys.net>
S: Maintained
ATM
M: Chas Williams <chas@cmf.nrl.navy.mil>
- L: linux-atm-general@lists.sourceforge.net (subscribers-only)
+ L: linux-atm-general@lists.sourceforge.net (moderated for non-subscribers)
L: netdev@vger.kernel.org
W: http://linux-atm.sourceforge.net
S: Maintained
S: Supported
F: drivers/net/tg3.*
+ BROCADE BFA FC SCSI DRIVER
+ P: Jing Huang
+ M: huangj@brocade.com
+ L: linux-scsi@vger.kernel.org
+ S: Supported
+ F: drivers/scsi/bfa/
+
BSG (block layer generic sg v4 driver)
M: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
L: linux-scsi@vger.kernel.org
CORETEMP HARDWARE MONITORING DRIVER
M: Rudolf Marek <r.marek@assembler.cz>
+ M: Huaxu Wan <huaxu.wan@intel.com>
L: lm-sensors@lm-sensors.org
S: Maintained
F: Documentation/hwmon/coretemp
F: drivers/scsi/dpt*
F: drivers/scsi/dpt/
+DRBD DRIVER
+P: Philipp Reisner
+P: Lars Ellenberg
+M: drbd-dev@lists.linbit.com
+L: drbd-user@lists.linbit.com
+W: http://www.drbd.org
+T: git git://git.drbd.org/linux-2.6-drbd.git drbd
+T: git git://git.drbd.org/drbd-8.3.git
+S: Supported
+F: drivers/block/drbd/
+F: lib/lru_cache.c
+F: Documentation/blockdev/drbd/
+
DRIVER CORE, KOBJECTS, AND SYSFS
M: Greg Kroah-Hartman <gregkh@suse.de>
T: quilt kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
F: fs/*
FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
- M: Riku Voipio <riku.vipio@iki.fi>
+ M: Riku Voipio <riku.voipio@iki.fi>
L: lm-sensors@lm-sensors.org
S: Maintained
F: drivers/hwmon/f75375s.c
F: arch/powerpc/sysdev/qe_lib/
F: arch/powerpc/include/asm/*qe.h
- FREESCALE USB PERIPHERIAL DRIVERS
+ FREESCALE USB PERIPHERAL DRIVERS
M: Li Yang <leoli@freescale.com>
L: linux-usb@vger.kernel.org
L: linuxppc-dev@ozlabs.org
F: fs/fscache/
F: include/linux/fscache*.h
- TRACING
- M: Steven Rostedt <rostedt@goodmis.org>
- M: Frederic Weisbecker <fweisbec@gmail.com>
- M: Ingo Molnar <mingo@redhat.com>
- T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing/core
- S: Maintained
- F: Documentation/trace/ftrace.txt
- F: arch/*/*/*/ftrace.h
- F: arch/*/kernel/ftrace.c
- F: include/*/ftrace.h include/trace/ include/linux/trace*.h
- F: kernel/trace/
-
FUJITSU FR-V (FRV) PORT
M: David Howells <dhowells@redhat.com>
S: Maintained
F: include/asm-generic
GENERIC UIO DRIVER FOR PCI DEVICES
- M: Michael S. Tsirkin <mst@redhat.com>
+ M: "Michael S. Tsirkin" <mst@redhat.com>
L: kvm@vger.kernel.org
- L: linux-kernel@vger.kernel.org
S: Supported
F: drivers/uio/uio_pci_generic.c
W: http://www.linux1394.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6.git
S: Maintained
+ F: Documentation/debugging-via-ohci1394.txt
F: drivers/ieee1394/
IEEE 1394 RAW I/O DRIVER
KERNEL JANITORS
L: kernel-janitors@vger.kernel.org
- W: http://www.kerneljanitors.org/
- S: Maintained
+ W: http://janitor.kernelnewbies.org/
+ S: Odd Fixes
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
M: "J. Bruce Fields" <bfields@fieldses.org>
F: drivers/block/nbd.c
F: include/linux/nbd.h
+ NETWORK DROP MONITOR
+ M: Neil Horman <nhorman@tuxdriver.com>
+ L: netdev@vger.kernel.org
+ S: Maintained
+ W: https://fedorahosted.org/dropwatch/
+ F: net/core/drop_monitor.c
+
NETWORKING [GENERAL]
M: "David S. Miller" <davem@davemloft.net>
L: netdev@vger.kernel.org
W: http://www.linuxfoundation.org/en/Net
+ W: http://patchwork.ozlabs.org/project/netdev/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git
S: Maintained
F: net/
F: drivers/video/nvidia/
OMAP SUPPORT
- M: "Tony Lindgren <tony@atomide.com>" <tony@atomide.com>
+ M: Tony Lindgren <tony@atomide.com>
L: linux-omap@vger.kernel.org
W: http://www.muru.com/linux/omap/
W: http://linux.omap.com/
F: Documentation/i2c/busses/i2c-ocores
F: drivers/i2c/busses/i2c-ocores.c
+ OPEN FIRMWARE AND FLATTENED DEVICE TREE
+ M: Grant Likely <grant.likely@secretlab.ca>
+ L: devicetree-discuss@lists.ozlabs.org
+ W: http://fdt.secretlab.ca
+ S: Maintained
+ F: drivers/of
+ F: include/linux/of*.h
+ K: of_get_property
+
OPROFILE
M: Robert Richter <robert.richter@amd.com>
L: oprofile-list@lists.sf.net
PARISC ARCHITECTURE
M: Kyle McMartin <kyle@mcmartin.ca>
M: Helge Deller <deller@gmx.de>
+ M: "James E.J. Bottomley" <jejb@parisc-linux.org>
L: linux-parisc@vger.kernel.org
W: http://www.parisc-linux.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kyle/parisc-2.6.git
M: Paul Mackerras <paulus@samba.org>
M: Ingo Molnar <mingo@elte.hu>
S: Supported
+ F: kernel/perf_event.c
+ F: include/linux/perf_event.h
+ F: arch/*/*/kernel/perf_event.c
+ F: arch/*/include/asm/perf_event.h
+ F: arch/*/lib/perf_event.c
+ F: arch/*/kernel/perf_callchain.c
+ F: tools/perf/
PERSONALITY HANDLING
M: Christoph Hellwig <hch@infradead.org>
RALINK RT2X00 WIRELESS LAN DRIVER
P: rt2x00 project
L: linux-wireless@vger.kernel.org
- L: users@rt2x00.serialmonkey.com
+ L: users@rt2x00.serialmonkey.com (moderated for non-subscribers)
W: http://rt2x00.serialmonkey.com/
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ivd/rt2x00.git
F: include/linux/sched.h
SCORE ARCHITECTURE
- P: Chen Liqin
- M: liqin.chen@sunplusct.com
- P: Lennox Wu
- M: lennox.wu@gmail.com
+ M: Chen Liqin <liqin.chen@sunplusct.com>
+ M: Lennox Wu <lennox.wu@gmail.com>
W: http://www.sunplusct.com
S: Supported
+ F: arch/score/
SCSI CDROM DRIVER
M: Jens Axboe <axboe@kernel.dk>
F: drivers/mmc/host/sdricoh_cs.c
SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
- S: Orphan
- L: linux-mmc@vger.kernel.org
- F: drivers/mmc/host/sdhci.*
+ S: Orphan
+ L: linux-mmc@vger.kernel.org
+ F: drivers/mmc/host/sdhci.*
SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
M: Anton Vorontsov <avorontsov@ru.mvista.com>
L: linuxppc-dev@ozlabs.org
- L: linux-mmc@vger.kernel.org
+ L: linux-mmc@vger.kernel.org
S: Maintained
- F: drivers/mmc/host/sdhci-of.*
+ F: drivers/mmc/host/sdhci-of.*
SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) SAMSUNG DRIVER
M: Ben Dooks <ben-linux@fluff.org>
- L: linux-mmc@vger.kernel.org
+ L: linux-mmc@vger.kernel.org
S: Maintained
F: drivers/mmc/host/sdhci-s3c.c
F: include/linux/ata.h
F: include/linux/libata.h
+ SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER
+ P: Jayamohan Kallickal
+ M: jayamohank@serverengines.com
+ L: linux-scsi@vger.kernel.org
+ W: http://www.serverengines.com
+ S: Supported
+ F: drivers/scsi/be2iscsi/
+
SERVER ENGINES 10Gbps NIC - BladeEngine 2 DRIVER
M: Sathya Perla <sathyap@serverengines.com>
M: Subbu Seetharaman <subbus@serverengines.com>
F: drivers/usb/host/ohci-lh7a40*
SIMPLE FIRMWARE INTERFACE (SFI)
- P: Len Brown
- M: lenb@kernel.org
+ M: Len Brown <lenb@kernel.org>
L: sfi-devel@simplefirmware.org
W: http://simplefirmware.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-sfi-2.6.git
S: Maintained
F: drivers/char/tpm/
+ TRACING
+ M: Steven Rostedt <rostedt@goodmis.org>
+ M: Frederic Weisbecker <fweisbec@gmail.com>
+ M: Ingo Molnar <mingo@redhat.com>
+ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing/core
+ S: Maintained
+ F: Documentation/trace/ftrace.txt
+ F: arch/*/*/*/ftrace.h
+ F: arch/*/kernel/ftrace.c
+ F: include/*/ftrace.h
+ F: include/linux/trace*.h
+ F: include/trace/
+ F: kernel/trace/
+
TRIVIAL PATCHES
M: Jiri Kosina <trivial@kernel.org>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial.git
F: drivers/vlynq/vlynq.c
F: include/linux/vlynq.h
+ VMWARE VMXNET3 ETHERNET DRIVER
+ M: Shreyas Bhatewara <sbhatewara@vmware.com>
+ M: VMware, Inc. <pv-drivers@vmware.com>
+ L: netdev@vger.kernel.org
+ S: Maintained
+ F: drivers/net/vmxnet3/
+
VOLTAGE AND CURRENT REGULATOR FRAMEWORK
M: Liam Girdwood <lrg@slimlogic.co.uk>
M: Mark Brown <broonie@opensource.wolfsonmicro.com>
F: drivers/scsi/wd7000.c
WINBOND CIR DRIVER
- P: David Härdeman
- M: david@hardeman.nu
+ M: David Härdeman <david@hardeman.nu>
S: Maintained
F: drivers/input/misc/winbond-cir.c
F: include/linux/wm97xx.h
WOLFSON MICROELECTRONICS PMIC DRIVERS
- P: Mark Brown
- M: broonie@opensource.wolfsonmicro.com
- L: linux-kernel@vger.kernel.org
+ M: Mark Brown <broonie@opensource.wolfsonmicro.com>
T: git git://opensource.wolfsonmicro.com/linux-2.6-audioplus
W: http://opensource.wolfsonmicro.com/node/8
S: Supported
static int cfq_slice_async = HZ / 25;
static const int cfq_slice_async_rq = 2;
static int cfq_slice_idle = HZ / 125;
+static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
+static const int cfq_hist_divisor = 4;
/*
* offset from end of service tree
*/
#define CFQ_MIN_TT (2)
+/*
+ * Allow merged cfqqs to perform this amount of seeky I/O before
+ * deciding to break the queues up again.
+ */
+#define CFQQ_COOP_TOUT (HZ)
+
#define CFQ_SLICE_SCALE (5)
#define CFQ_HW_QUEUE_MIN (5)
struct cfq_rb_root {
struct rb_root rb;
struct rb_node *left;
+ unsigned count;
};
-#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }
+#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, }
/*
* Per process-grouping structure
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
+ unsigned int seek_samples;
+ u64 seek_total;
+ sector_t seek_mean;
+ sector_t last_request_pos;
+ unsigned long seeky_start;
+
pid_t pid;
+
+ struct cfq_rb_root *service_tree;
+ struct cfq_queue *new_cfqq;
};
+/*
+ * First index in the service_trees.
+ * IDLE is handled separately, so it has negative index
+ */
+enum wl_prio_t {
+ IDLE_WORKLOAD = -1,
+ BE_WORKLOAD = 0,
+ RT_WORKLOAD = 1
+};
+
+/*
+ * Second index in the service_trees.
+ */
+enum wl_type_t {
+ ASYNC_WORKLOAD = 0,
+ SYNC_NOIDLE_WORKLOAD = 1,
+ SYNC_WORKLOAD = 2
+};
+
+
/*
* Per block device queue structure
*/
struct request_queue *queue;
/*
- * rr list of queues with requests and the count of them
+ * rr lists of queues with requests, onle rr for each priority class.
+ * Counts are embedded in the cfq_rb_root
+ */
+ struct cfq_rb_root service_trees[2][3];
+ struct cfq_rb_root service_tree_idle;
+ /*
+ * The priority currently being served
*/
- struct cfq_rb_root service_tree;
+ enum wl_prio_t serving_prio;
+ enum wl_type_t serving_type;
+ unsigned long workload_expires;
/*
* Each priority tree is sorted by next_request position. These
struct rb_root prio_trees[CFQ_PRIO_LISTS];
unsigned int busy_queues;
+ unsigned int busy_queues_avg[2];
int rq_in_driver[2];
int sync_flight;
unsigned long last_end_sync_rq;
};
+static struct cfq_rb_root *service_tree_for(enum wl_prio_t prio,
+ enum wl_type_t type,
+ struct cfq_data *cfqd)
+{
+ if (prio == IDLE_WORKLOAD)
+ return &cfqd->service_tree_idle;
+
+ return &cfqd->service_trees[prio][type];
+}
+
enum cfqq_state_flags {
CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
CFQ_CFQQ_FLAG_sync, /* synchronous queue */
- CFQ_CFQQ_FLAG_coop, /* has done a coop jump of the queue */
+ CFQ_CFQQ_FLAG_coop, /* cfqq is shared */
+ CFQ_CFQQ_FLAG_coop_preempt, /* coop preempt */
};
#define CFQ_CFQQ_FNS(name) \
CFQ_CFQQ_FNS(slice_new);
CFQ_CFQQ_FNS(sync);
CFQ_CFQQ_FNS(coop);
+ CFQ_CFQQ_FNS(coop_preempt);
#undef CFQ_CFQQ_FNS
#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
#define cfq_log(cfqd, fmt, args...) \
blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
+static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
+{
+ if (cfq_class_idle(cfqq))
+ return IDLE_WORKLOAD;
+ if (cfq_class_rt(cfqq))
+ return RT_WORKLOAD;
+ return BE_WORKLOAD;
+}
+
+
+static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
+{
+ if (!cfq_cfqq_sync(cfqq))
+ return ASYNC_WORKLOAD;
+ if (!cfq_cfqq_idle_window(cfqq))
+ return SYNC_NOIDLE_WORKLOAD;
+ return SYNC_WORKLOAD;
+}
+
+static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)
+{
+ if (wl == IDLE_WORKLOAD)
+ return cfqd->service_tree_idle.count;
+
+ return cfqd->service_trees[wl][ASYNC_WORKLOAD].count
+ + cfqd->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count
+ + cfqd->service_trees[wl][SYNC_WORKLOAD].count;
+}
+
static void cfq_dispatch_insert(struct request_queue *, struct request *);
static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
struct io_context *, gfp_t);
return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
}
+/*
+ * get averaged number of queues of RT/BE priority.
+ * average is updated, with a formula that gives more weight to higher numbers,
+ * to quickly follows sudden increases and decrease slowly
+ */
+
+static inline unsigned cfq_get_avg_queues(struct cfq_data *cfqd, bool rt)
+{
+ unsigned min_q, max_q;
+ unsigned mult = cfq_hist_divisor - 1;
+ unsigned round = cfq_hist_divisor / 2;
+ unsigned busy = cfq_busy_queues_wl(rt, cfqd);
+
+ min_q = min(cfqd->busy_queues_avg[rt], busy);
+ max_q = max(cfqd->busy_queues_avg[rt], busy);
+ cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
+ cfq_hist_divisor;
+ return cfqd->busy_queues_avg[rt];
+}
+
static inline void
cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
+ unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
+ if (cfqd->cfq_latency) {
+ /* interested queues (we consider only the ones with the same
+ * priority class) */
+ unsigned iq = cfq_get_avg_queues(cfqd, cfq_class_rt(cfqq));
+ unsigned sync_slice = cfqd->cfq_slice[1];
+ unsigned expect_latency = sync_slice * iq;
+ if (expect_latency > cfq_target_latency) {
+ unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
+ /* scale low_slice according to IO priority
+ * and sync vs async */
+ unsigned low_slice =
+ min(slice, base_low_slice * slice / sync_slice);
+ /* the adapted slice value is scaled to fit all iqs
+ * into the target latency */
+ slice = max(slice * cfq_target_latency / expect_latency,
+ low_slice);
+ }
+ }
+ cfqq->slice_end = jiffies + slice;
cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
}
if (root->left == n)
root->left = NULL;
rb_erase_init(n, &root->rb);
+ --root->count;
}
/*
}
/*
- * The cfqd->service_tree holds all pending cfq_queue's that have
+ * The cfqd->service_trees holds all pending cfq_queue's that have
* requests waiting to be processed. It is sorted in the order that
* we will service the queues.
*/
struct rb_node **p, *parent;
struct cfq_queue *__cfqq;
unsigned long rb_key;
+ struct cfq_rb_root *service_tree;
int left;
+ service_tree = service_tree_for(cfqq_prio(cfqq), cfqq_type(cfqq), cfqd);
if (cfq_class_idle(cfqq)) {
rb_key = CFQ_IDLE_DELAY;
- parent = rb_last(&cfqd->service_tree.rb);
+ parent = rb_last(&service_tree->rb);
if (parent && parent != &cfqq->rb_node) {
__cfqq = rb_entry(parent, struct cfq_queue, rb_node);
rb_key += __cfqq->rb_key;
cfqq->slice_resid = 0;
} else {
rb_key = -HZ;
- __cfqq = cfq_rb_first(&cfqd->service_tree);
+ __cfqq = cfq_rb_first(service_tree);
rb_key += __cfqq ? __cfqq->rb_key : jiffies;
}
/*
* same position, nothing more to do
*/
- if (rb_key == cfqq->rb_key)
+ if (rb_key == cfqq->rb_key &&
+ cfqq->service_tree == service_tree)
return;
- cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
+ cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
+ cfqq->service_tree = NULL;
}
left = 1;
parent = NULL;
- p = &cfqd->service_tree.rb.rb_node;
+ cfqq->service_tree = service_tree;
+ p = &service_tree->rb.rb_node;
while (*p) {
struct rb_node **n;
__cfqq = rb_entry(parent, struct cfq_queue, rb_node);
/*
- * sort RT queues first, we always want to give
- * preference to them. IDLE queues goes to the back.
- * after that, sort on the next service time.
+ * sort by key, that represents service time.
*/
- if (cfq_class_rt(cfqq) > cfq_class_rt(__cfqq))
- n = &(*p)->rb_left;
- else if (cfq_class_rt(cfqq) < cfq_class_rt(__cfqq))
- n = &(*p)->rb_right;
- else if (cfq_class_idle(cfqq) < cfq_class_idle(__cfqq))
+ if (time_before(rb_key, __cfqq->rb_key))
n = &(*p)->rb_left;
- else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq))
- n = &(*p)->rb_right;
- else if (time_before(rb_key, __cfqq->rb_key))
- n = &(*p)->rb_left;
- else
+ else {
n = &(*p)->rb_right;
-
- if (n == &(*p)->rb_right)
left = 0;
+ }
p = n;
}
if (left)
- cfqd->service_tree.left = &cfqq->rb_node;
+ service_tree->left = &cfqq->rb_node;
cfqq->rb_key = rb_key;
rb_link_node(&cfqq->rb_node, parent, p);
- rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
+ rb_insert_color(&cfqq->rb_node, &service_tree->rb);
+ service_tree->count++;
}
static struct cfq_queue *
BUG_ON(!cfq_cfqq_on_rr(cfqq));
cfq_clear_cfqq_on_rr(cfqq);
- if (!RB_EMPTY_NODE(&cfqq->rb_node))
- cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
+ if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
+ cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
+ cfqq->service_tree = NULL;
+ }
if (cfqq->p_root) {
rb_erase(&cfqq->p_node, cfqq->p_root);
cfqq->p_root = NULL;
*/
static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
{
- if (RB_EMPTY_ROOT(&cfqd->service_tree.rb))
- return NULL;
+ struct cfq_rb_root *service_tree =
+ service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd);
- return cfq_rb_first(&cfqd->service_tree);
+ if (RB_EMPTY_ROOT(&service_tree->rb))
+ return NULL;
+ return cfq_rb_first(service_tree);
}
/*
static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
struct cfq_queue *cfqq)
{
- if (!cfqq)
+ if (!cfqq) {
cfqq = cfq_get_next_queue(cfqd);
+
+ if (cfqq && !cfq_cfqq_coop_preempt(cfqq))
+ cfq_clear_cfqq_coop(cfqq);
+ }
+
+ if (cfqq)
+ cfq_clear_cfqq_coop_preempt(cfqq);
+
__cfq_set_active_queue(cfqd, cfqq);
return cfqq;
}
return cfqd->last_position - blk_rq_pos(rq);
}
-#define CIC_SEEK_THR 8 * 1024
-#define CIC_SEEKY(cic) ((cic)->seek_mean > CIC_SEEK_THR)
+#define CFQQ_SEEK_THR 8 * 1024
+#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR)
-static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
+static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+ struct request *rq)
{
- struct cfq_io_context *cic = cfqd->active_cic;
- sector_t sdist = cic->seek_mean;
+ sector_t sdist = cfqq->seek_mean;
- if (!sample_valid(cic->seek_samples))
- sdist = CIC_SEEK_THR;
+ if (!sample_valid(cfqq->seek_samples))
+ sdist = CFQQ_SEEK_THR;
return cfq_dist_from_last(cfqd, rq) <= sdist;
}
* will contain the closest sector.
*/
__cfqq = rb_entry(parent, struct cfq_queue, p_node);
- if (cfq_rq_close(cfqd, __cfqq->next_rq))
+ if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
return __cfqq;
if (blk_rq_pos(__cfqq->next_rq) < sector)
return NULL;
__cfqq = rb_entry(node, struct cfq_queue, p_node);
- if (cfq_rq_close(cfqd, __cfqq->next_rq))
+ if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
return __cfqq;
return NULL;
* assumption.
*/
static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
- struct cfq_queue *cur_cfqq,
- bool probe)
+ struct cfq_queue *cur_cfqq)
{
struct cfq_queue *cfqq;
- /*
- * A valid cfq_io_context is necessary to compare requests against
- * the seek_mean of the current cfqq.
- */
- if (!cfqd->active_cic)
+ if (!cfq_cfqq_sync(cur_cfqq))
+ return NULL;
+ if (CFQQ_SEEKY(cur_cfqq))
return NULL;
/*
if (!cfqq)
return NULL;
- if (cfq_cfqq_coop(cfqq))
+ /*
+ * It only makes sense to merge sync queues.
+ */
+ if (!cfq_cfqq_sync(cfqq))
+ return NULL;
+ if (CFQQ_SEEKY(cfqq))
+ return NULL;
+
+ /*
+ * Do not merge queues of different priority classes
+ */
+ if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq))
return NULL;
- if (!probe)
- cfq_mark_cfqq_coop(cfqq);
return cfqq;
}
+/*
+ * Determine whether we should enforce idle window for this queue.
+ */
+
+static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+ enum wl_prio_t prio = cfqq_prio(cfqq);
+ struct cfq_rb_root *service_tree = cfqq->service_tree;
+
+ /* We never do for idle class queues. */
+ if (prio == IDLE_WORKLOAD)
+ return false;
+
+ /* We do for queues that were marked with idle window flag. */
+ if (cfq_cfqq_idle_window(cfqq))
+ return true;
+
+ /*
+ * Otherwise, we do only if they are the last ones
+ * in their service tree.
+ */
+ if (!service_tree)
+ service_tree = service_tree_for(prio, cfqq_type(cfqq), cfqd);
+
+ if (service_tree->count == 0)
+ return true;
+
+ return (service_tree->count == 1 && cfq_rb_first(service_tree) == cfqq);
+}
+
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
{
struct cfq_queue *cfqq = cfqd->active_queue;
/*
* idle is disabled, either manually or by past process history
*/
- if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq))
+ if (!cfqd->cfq_slice_idle || !cfq_should_idle(cfqd, cfqq))
return;
/*
cfq_mark_cfqq_wait_request(cfqq);
- /*
- * we don't want to idle for seeks, but we do want to allow
- * fair distribution of slice time for a process doing back-to-back
- * seeks. so allow a little bit of time for him to submit a new rq
- */
sl = cfqd->cfq_slice_idle;
- if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
+ /* are we servicing noidle tree, and there are more queues?
+ * non-rotational or NCQ: no idle
+ * non-NCQ rotational : very small idle, to allow
+ * fair distribution of slice time for a process doing back-to-back
+ * seeks.
+ */
+ if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD &&
+ service_tree_for(cfqd->serving_prio, SYNC_NOIDLE_WORKLOAD, cfqd)
+ ->count > 0) {
+ if (blk_queue_nonrot(cfqd->queue) || cfqd->hw_tag)
+ return;
sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
+ }
mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
}
+/*
+ * Must be called with the queue_lock held.
+ */
+static int cfqq_process_refs(struct cfq_queue *cfqq)
+{
+ int process_refs, io_refs;
+
+ io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
+ process_refs = atomic_read(&cfqq->ref) - io_refs;
+ BUG_ON(process_refs < 0);
+ return process_refs;
+}
+
+static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
+{
+ int process_refs, new_process_refs;
+ struct cfq_queue *__cfqq;
+
+ /* Avoid a circular list and skip interim queue merges */
+ while ((__cfqq = new_cfqq->new_cfqq)) {
+ if (__cfqq == cfqq)
+ return;
+ new_cfqq = __cfqq;
+ }
+
+ process_refs = cfqq_process_refs(cfqq);
+ /*
+ * If the process for the cfqq has gone away, there is no
+ * sense in merging the queues.
+ */
+ if (process_refs == 0)
+ return;
+
+ /*
+ * Merge in the direction of the lesser amount of work.
+ */
+ new_process_refs = cfqq_process_refs(new_cfqq);
+ if (new_process_refs >= process_refs) {
+ cfqq->new_cfqq = new_cfqq;
+ atomic_add(process_refs, &new_cfqq->ref);
+ } else {
+ new_cfqq->new_cfqq = cfqq;
+ atomic_add(new_process_refs, &cfqq->ref);
+ }
+}
+
+static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, enum wl_prio_t prio,
+ bool prio_changed)
+{
+ struct cfq_queue *queue;
+ int i;
+ bool key_valid = false;
+ unsigned long lowest_key = 0;
+ enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
+
+ if (prio_changed) {
+ /*
+ * When priorities switched, we prefer starting
+ * from SYNC_NOIDLE (first choice), or just SYNC
+ * over ASYNC
+ */
+ if (service_tree_for(prio, cur_best, cfqd)->count)
+ return cur_best;
+ cur_best = SYNC_WORKLOAD;
+ if (service_tree_for(prio, cur_best, cfqd)->count)
+ return cur_best;
+
+ return ASYNC_WORKLOAD;
+ }
+
+ for (i = 0; i < 3; ++i) {
+ /* otherwise, select the one with lowest rb_key */
+ queue = cfq_rb_first(service_tree_for(prio, i, cfqd));
+ if (queue &&
+ (!key_valid || time_before(queue->rb_key, lowest_key))) {
+ lowest_key = queue->rb_key;
+ cur_best = i;
+ key_valid = true;
+ }
+ }
+
+ return cur_best;
+}
+
+static void choose_service_tree(struct cfq_data *cfqd)
+{
+ enum wl_prio_t previous_prio = cfqd->serving_prio;
+ bool prio_changed;
+ unsigned slice;
+ unsigned count;
+
+ /* Choose next priority. RT > BE > IDLE */
+ if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
+ cfqd->serving_prio = RT_WORKLOAD;
+ else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))
+ cfqd->serving_prio = BE_WORKLOAD;
+ else {
+ cfqd->serving_prio = IDLE_WORKLOAD;
+ cfqd->workload_expires = jiffies + 1;
+ return;
+ }
+
+ /*
+ * For RT and BE, we have to choose also the type
+ * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
+ * expiration time
+ */
+ prio_changed = (cfqd->serving_prio != previous_prio);
+ count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd)
+ ->count;
+
+ /*
+ * If priority didn't change, check workload expiration,
+ * and that we still have other queues ready
+ */
+ if (!prio_changed && count &&
+ !time_after(jiffies, cfqd->workload_expires))
+ return;
+
+ /* otherwise select new workload type */
+ cfqd->serving_type =
+ cfq_choose_wl(cfqd, cfqd->serving_prio, prio_changed);
+ count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd)
+ ->count;
+
+ /*
+ * the workload slice is computed as a fraction of target latency
+ * proportional to the number of queues in that workload, over
+ * all the queues in the same priority class
+ */
+ slice = cfq_target_latency * count /
+ max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],
+ cfq_busy_queues_wl(cfqd->serving_prio, cfqd));
+
+ if (cfqd->serving_type == ASYNC_WORKLOAD)
+ /* async workload slice is scaled down according to
+ * the sync/async slice ratio. */
+ slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
+ else
+ /* sync workload slice is at least 2 * cfq_slice_idle */
+ slice = max(slice, 2 * cfqd->cfq_slice_idle);
+
+ slice = max_t(unsigned, slice, CFQ_MIN_TT);
+ cfqd->workload_expires = jiffies + slice;
+}
+
/*
* Select a queue for service. If we have a current active queue,
* check whether to continue servicing it, or retrieve and set a new one.
* If another queue has a request waiting within our mean seek
* distance, let it run. The expire code will check for close
* cooperators and put the close queue at the front of the service
- * tree.
+ * tree. If possible, merge the expiring queue with the new cfqq.
*/
- new_cfqq = cfq_close_cooperator(cfqd, cfqq, 0);
- if (new_cfqq)
+ new_cfqq = cfq_close_cooperator(cfqd, cfqq);
+ if (new_cfqq) {
+ if (!cfqq->new_cfqq)
+ cfq_setup_merge(cfqq, new_cfqq);
goto expire;
+ }
/*
* No requests pending. If the active queue still has requests in
* conditions to happen (or time out) before selecting a new queue.
*/
if (timer_pending(&cfqd->idle_slice_timer) ||
- (cfqq->dispatched && cfq_cfqq_idle_window(cfqq))) {
+ (cfqq->dispatched && cfq_should_idle(cfqd, cfqq))) {
cfqq = NULL;
goto keep_queue;
}
expire:
cfq_slice_expired(cfqd, 0);
new_queue:
+ /*
+ * Current queue expired. Check if we have to switch to a new
+ * service tree
+ */
+ if (!new_cfqq)
+ choose_service_tree(cfqd);
+
cfqq = cfq_set_active_queue(cfqd, new_cfqq);
keep_queue:
return cfqq;
{
struct cfq_queue *cfqq;
int dispatched = 0;
-
- while ((cfqq = cfq_rb_first(&cfqd->service_tree)) != NULL)
+ int i, j;
+ for (i = 0; i < 2; ++i)
+ for (j = 0; j < 3; ++j)
+ while ((cfqq = cfq_rb_first(&cfqd->service_trees[i][j]))
+ != NULL)
+ dispatched += __cfq_forced_dispatch_cfqq(cfqq);
+
+ while ((cfqq = cfq_rb_first(&cfqd->service_tree_idle)) != NULL)
dispatched += __cfq_forced_dispatch_cfqq(cfqq);
cfq_slice_expired(cfqd, 0);
/*
* Drain async requests before we start sync IO
*/
- if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+ if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
return false;
/*
static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
+ struct cfq_queue *__cfqq, *next;
+
if (unlikely(cfqq == cfqd->active_queue)) {
__cfq_slice_expired(cfqd, cfqq, 0);
cfq_schedule_dispatch(cfqd);
}
+ /*
+ * If this queue was scheduled to merge with another queue, be
+ * sure to drop the reference taken on that queue (and others in
+ * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs.
+ */
+ __cfqq = cfqq->new_cfqq;
+ while (__cfqq) {
+ if (__cfqq == cfqq) {
+ WARN(1, "cfqq->new_cfqq loop detected\n");
+ break;
+ }
+ next = __cfqq->new_cfqq;
+ cfq_put_queue(__cfqq);
+ __cfqq = next;
+ }
+
cfq_put_queue(cfqq);
}
}
static void
-cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
+cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct request *rq)
{
sector_t sdist;
u64 total;
- if (!cic->last_request_pos)
+ if (!cfqq->last_request_pos)
sdist = 0;
- else if (cic->last_request_pos < blk_rq_pos(rq))
- sdist = blk_rq_pos(rq) - cic->last_request_pos;
+ else if (cfqq->last_request_pos < blk_rq_pos(rq))
+ sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
else
- sdist = cic->last_request_pos - blk_rq_pos(rq);
+ sdist = cfqq->last_request_pos - blk_rq_pos(rq);
/*
* Don't allow the seek distance to get too large from the
* odd fragment, pagein, etc
*/
- if (cic->seek_samples <= 60) /* second&third seek */
- sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024);
+ if (cfqq->seek_samples <= 60) /* second&third seek */
+ sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*1024);
else
- sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64);
+ sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*64);
- cic->seek_samples = (7*cic->seek_samples + 256) / 8;
- cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8;
- total = cic->seek_total + (cic->seek_samples/2);
- do_div(total, cic->seek_samples);
- cic->seek_mean = (sector_t)total;
+ cfqq->seek_samples = (7*cfqq->seek_samples + 256) / 8;
+ cfqq->seek_total = (7*cfqq->seek_total + (u64)256*sdist) / 8;
+ total = cfqq->seek_total + (cfqq->seek_samples/2);
+ do_div(total, cfqq->seek_samples);
+ cfqq->seek_mean = (sector_t)total;
+
+ /*
+ * If this cfqq is shared between multiple processes, check to
+ * make sure that those processes are still issuing I/Os within
+ * the mean seek distance. If not, it may be time to break the
+ * queues apart again.
+ */
+ if (cfq_cfqq_coop(cfqq)) {
+ if (CFQQ_SEEKY(cfqq) && !cfqq->seeky_start)
+ cfqq->seeky_start = jiffies;
+ else if (!CFQQ_SEEKY(cfqq))
+ cfqq->seeky_start = 0;
+ }
}
/*
enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
- (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic)))
+ (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)))
enable_idle = 0;
else if (sample_valid(cic->ttime_samples)) {
- unsigned int slice_idle = cfqd->cfq_slice_idle;
- if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
- slice_idle = msecs_to_jiffies(CFQ_MIN_TT);
- if (cic->ttime_mean > slice_idle)
+ if (cic->ttime_mean > cfqd->cfq_slice_idle)
enable_idle = 0;
else
enable_idle = 1;
if (cfq_class_idle(cfqq))
return true;
+ if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD
+ && new_cfqq->service_tree == cfqq->service_tree)
+ return true;
+
/*
* if the new request is sync, but the currently running queue is
* not, let the sync request have priority.
* it's a metadata request and the current queue is doing regular IO.
*/
if (rq_is_meta(rq) && !cfqq->meta_pending)
- return false;
+ return true;
/*
* Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
* if this request is as-good as one we would expect from the
* current cfqq, let it preempt
*/
- if (cfq_rq_close(cfqd, rq) && (!cfq_cfqq_coop(new_cfqq) ||
+ if (cfq_rq_close(cfqd, cfqq, rq))
++ if (cfq_rq_close(cfqd, cfqq, rq) && (!cfq_cfqq_coop(new_cfqq) ||
+ cfqd->busy_queues == 1)) {
+ /*
+ * Mark new queue coop_preempt, so its coop flag will not be
+ * cleared when new queue gets scheduled at the very first time
+ */
+ cfq_mark_cfqq_coop_preempt(new_cfqq);
+ cfq_mark_cfqq_coop(new_cfqq);
return true;
+ }
return false;
}
cfqq->meta_pending++;
cfq_update_io_thinktime(cfqd, cic);
- cfq_update_io_seektime(cfqd, cic, rq);
+ cfq_update_io_seektime(cfqd, cfqq, rq);
cfq_update_idle_window(cfqd, cfqq, cic);
- cic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
+ cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
if (cfqq == cfqd->active_queue) {
/*
cfq_log_cfqq(cfqd, cfqq, "insert_request");
cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
- cfq_add_rq_rb(rq);
-
rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
list_add_tail(&rq->queuelist, &cfqq->fifo);
+ cfq_add_rq_rb(rq);
cfq_rq_enqueued(cfqd, cfqq, rq);
}
*/
static void cfq_update_hw_tag(struct cfq_data *cfqd)
{
+ struct cfq_queue *cfqq = cfqd->active_queue;
+
if (rq_in_driver(cfqd) > cfqd->rq_in_driver_peak)
cfqd->rq_in_driver_peak = rq_in_driver(cfqd);
rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN)
return;
+ /*
+ * If active queue hasn't enough requests and can idle, cfq might not
+ * dispatch sufficient requests to hardware. Don't zero hw_tag in this
+ * case
+ */
+ if (cfqq && cfq_cfqq_idle_window(cfqq) &&
+ cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] <
+ CFQ_HW_QUEUE_MIN && rq_in_driver(cfqd) < CFQ_HW_QUEUE_MIN)
+ return;
+
if (cfqd->hw_tag_samples++ < 50)
return;
*/
if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
cfq_slice_expired(cfqd, 1);
- else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq, 1) &&
+ else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq) &&
sync && !rq_noidle(rq))
cfq_arm_slice_timer(cfqd);
}
cfqq->ioprio = IOPRIO_NORM;
} else {
/*
- * check if we need to unboost the queue
+ * unboost the queue (if needed)
*/
- if (cfqq->ioprio_class != cfqq->org_ioprio_class)
- cfqq->ioprio_class = cfqq->org_ioprio_class;
- if (cfqq->ioprio != cfqq->org_ioprio)
- cfqq->ioprio = cfqq->org_ioprio;
+ cfqq->ioprio_class = cfqq->org_ioprio_class;
+ cfqq->ioprio = cfqq->org_ioprio;
}
}
}
}
+static struct cfq_queue *
+cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
+ struct cfq_queue *cfqq)
+{
+ cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
+ cic_set_cfqq(cic, cfqq->new_cfqq, 1);
+ cfq_mark_cfqq_coop(cfqq->new_cfqq);
+ cfq_put_queue(cfqq);
+ return cic_to_cfqq(cic, 1);
+}
+
+static int should_split_cfqq(struct cfq_queue *cfqq)
+{
+ if (cfqq->seeky_start &&
+ time_after(jiffies, cfqq->seeky_start + CFQQ_COOP_TOUT))
+ return 1;
+ return 0;
+}
+
+/*
+ * Returns NULL if a new cfqq should be allocated, or the old cfqq if this
+ * was the last process referring to said cfqq.
+ */
+static struct cfq_queue *
+split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
+{
+ if (cfqq_process_refs(cfqq) == 1) {
+ cfqq->seeky_start = 0;
+ cfqq->pid = current->pid;
+ cfq_clear_cfqq_coop(cfqq);
+ return cfqq;
+ }
+
+ cic_set_cfqq(cic, NULL, 1);
+ cfq_put_queue(cfqq);
+ return NULL;
+}
/*
* Allocate cfq data structures associated with this request.
*/
if (!cic)
goto queue_fail;
+new_queue:
cfqq = cic_to_cfqq(cic, is_sync);
if (!cfqq || cfqq == &cfqd->oom_cfqq) {
cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
cic_set_cfqq(cic, cfqq, is_sync);
+ } else {
+ /*
+ * If the queue was seeky for too long, break it apart.
+ */
+ if (cfq_cfqq_coop(cfqq) && should_split_cfqq(cfqq)) {
+ cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq");
+ cfqq = split_cfqq(cic, cfqq);
+ if (!cfqq)
+ goto new_queue;
+ }
+
+ /*
+ * Check to see if this queue is scheduled to merge with
+ * another, closely cooperating queue. The merging of
+ * queues happens here as it must be done in process context.
+ * The reference on new_cfqq was taken in merge_cfqqs.
+ */
+ if (cfqq->new_cfqq)
+ cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq);
}
cfqq->allocated[rw]++;
static void *cfq_init_queue(struct request_queue *q)
{
struct cfq_data *cfqd;
- int i;
+ int i, j;
cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
if (!cfqd)
return NULL;
- cfqd->service_tree = CFQ_RB_ROOT;
+ for (i = 0; i < 2; ++i)
+ for (j = 0; j < 3; ++j)
+ cfqd->service_trees[i][j] = CFQ_RB_ROOT;
+ cfqd->service_tree_idle = CFQ_RB_ROOT;
/*
* Not strictly needed (since RB_ROOT just clears the node and we