MySQL 组合索引使用
关于组合索引的使用,作个记录。
组合索引是指一个索引中包含多个列,当查询条件包含多个列时,需要注意左前缀原则,即查询的条件需要严格按照索引列的顺序,否则将不会完全使用到索引。
测试基础表:
create table `test` (
`id` int(11) unsigned not null auto_increment,
`A` int(11) not null default 0,
`B` varchar(32) not null default '',
`C` varchar(255) not null default 0,
`D` int(11) not null default 0,
primary key (`id`),
key `idx_test` (`A`, `B`, `C`) using btree
) engine=innodb default charset=utf8;
-- 大概生成 60W 测试数据
insert into test(A, B, C, D) values(rand() * 1000000, substring(md5(rand() * 100), 1, rand() * 20), concat(substring(md5(rand()), 1, rand() * 20), ' ', substring(md5(rand()), 1, rand() * 10)), rand() * 1000000);
insert into test(A, B, C, D) select A, B, C, D from test;
下面是各种组合条件的情况:
-- 使用索引 key_len=4
select * from test where A < 10;
-- 由于数据区分度几乎为零,调优器会选择全表扫描
select * from test where A > 10;
-- 由于选取的列都在索引中,会直接从索引中取数据,不扫表 key_len=4
select A, C from test where A > 10;
-- 使用组合索引 key_len=869
select * from test where A = 10 and B = 'title' and C = '123';
-- 使用组合索引 key_len=869
select * from test where A = 10 and B in ('title', 'subtitle') and C in ('123', '456');
-- 当使用比较条件时,后面的索引会失效 key_len=4
select * from test where A < 10 and B = 'title' and C = '123';
-- 索引失效
select * from test where A <> 10;
-- 使用索引 key_len=102
select * from test where A = 10 and B not in ('title', '123');
-- not in 条件会使后面的列失效 key_len=102
select * from test where A = 10 and B not in ('title', '123') and c = '456';
下面是各种组合条件加排序的情况:
-- 由于有未索引的列,组合索引失效 Using filesort
select * from test order by A;
-- 所有列都属于索引,使用索引 key_len=869
select A, C from test order by A;
-- 符合左前缀原则,使用索引 key_len=869
select A, C from test order by A asc, B asc;
-- 排序方向不一致,但是列都属于索引,索引使用 key_len=869,但是 Using filesort
-- 可以建立 idx_test(A asc, B desc, C aes) 索引来解决
select A, C from test order by A asc, B desc;
-- 不符合左前缀原则,但是列都属于索引,索引使用 key_len=869,但是 Using filesort
select A, C from test order by A asc, C asc;
-- 符合左前缀原则,A 列使用索引,B 列排序 key_len=4
select * from test where A = 10 order by B asc;
-- 不完全符合左前缀原则,A 使用索引,排序失效 key_len=4, Using filesort
select * from test where A = 10 order by C asc;
-- 索引失效,type=ALL, Using filesort
select * from test where A > 10 order by C asc;
-- 符合左前缀原则,使用索引 key_len=102
select * from test where A = 1000 and B in ('123', '456') order by B asc;
select * from test where A = 1000 and B in ('123', '456') order by A asc, B asc, C asc;
-- 比较条件导致排序索引失效 key_len=4, Using filesort
select * from test where A < 1000 and B in ('123', '456') order by B asc;
-- 范围条件导致排序索引失效 key_len=102, Using filesort
select * from test where A in (10, 1000) and B in ('123', '456') order by B asc;
-- 当出现文件排序时,可对排序的列或者选择的列做处理,令其符合左前缀原则
select * from test where A = 10 order by B asc, C asc;
select A, B, C from test where A > 10 order by A asc, B asc, C asc;
select * from test where A < 1000 and B in ('123', '456') order by A asc, B asc;
select * from test where A in (10, 1000) and B in ('123', '456') order by A asc, B asc;
group by
语句在利用索引扫描时分为松散扫描和紧凑扫描两种情况,其中松散扫描是指只利用索引扫描就能实现满足条件的分组排序,无需扫描所有索引或者建立临时表来获取结果,效率会高很多。
下面是官网给出能用上松散扫描的情况:
select A, B from test group by A, B;
select distinct A, B from test group by A;
select A, B from test where A < 1000 group by A, B;
select max(A), min(A), B, C from test where A > 1000 group by A, B;
select B from test where A < 10000 group by A, B;
select A, B from test where C = '123' group by A, B;
当 group by
不能使用索引扫描完成时,在使用临时表分组时内部会进行 order by
排序再分组,这时可以使用 order by null
来取消内部排序。
-- Using where; Using temporary; Using filesort
select A, B from test where C = '123' group by A, B, D;
-- Using where; Using temporary
select A, B from test where C = '123' group by A, B, D order by null;
有时候会碰到调优器选择错误索引反而更慢的情况,除了可以用 use index
、force index
、ignore index
来指定索引外,还可以给不想用索引的列加上运算来略过,如 A + 0 = 1000
、concat(C, '') = '123'
。
alter table test add index idx_c(C) using btree;
-- key=idx_c, Using index condition; Using temporary; Using filesort
select A, B from test where C = '123' group by A, B;
-- key=idx_test, Using where; Using index
select A, B from test where concat(C, '') = '123' group by A, B;