MySQL 组合索引使用

2019-06-18

关于组合索引的使用，作个记录。

组合索引是指一个索引中包含多个列，当查询条件包含多个列时，需要注意左前缀原则，即查询的条件需要严格按照索引列的顺序，否则将不会完全使用到索引。

测试基础表：

create table `test` (
    `id` int(11) unsigned not null auto_increment,
    `A` int(11) not null default 0,
    `B` varchar(32) not null default '',
    `C` varchar(255) not null default 0,
    `D` int(11) not null default 0,
    primary key (`id`),
    key `idx_test` (`A`, `B`, `C`) using btree
) engine=innodb default charset=utf8;

-- 大概生成 60W 测试数据
insert into test(A, B, C, D) values(rand() * 1000000, substring(md5(rand() * 100), 1, rand() * 20), concat(substring(md5(rand()), 1, rand() * 20), ' ', substring(md5(rand()), 1, rand() * 10)), rand() * 1000000);
insert into test(A, B, C, D) select A, B, C, D from test;

下面是各种组合条件的情况：

-- 使用索引 key_len=4
select * from test where A < 10;

-- 由于数据区分度几乎为零，调优器会选择全表扫描
select * from test where A > 10;

-- 由于选取的列都在索引中，会直接从索引中取数据，不扫表 key_len=4
select A, C from test where A > 10;

-- 使用组合索引 key_len=869
select * from test where A = 10 and B = 'title' and C = '123';

-- 使用组合索引 key_len=869
select * from test where A = 10 and B in ('title', 'subtitle') and C in ('123', '456');

-- 当使用比较条件时，后面的索引会失效 key_len=4
select * from test where A < 10 and B = 'title' and C = '123';

-- 索引失效
select * from test where A <> 10;

-- 使用索引 key_len=102
select * from test where A = 10 and B not in ('title', '123');

-- not in 条件会使后面的列失效 key_len=102
select * from test where A = 10 and B not in ('title', '123') and c = '456';

下面是各种组合条件加排序的情况：

-- 由于有未索引的列，组合索引失效 Using filesort
select * from test order by A;

-- 所有列都属于索引，使用索引 key_len=869
select A, C from test order by A;

-- 符合左前缀原则，使用索引 key_len=869
select A, C from test order by A asc, B asc;

-- 排序方向不一致，但是列都属于索引，索引使用 key_len=869，但是 Using filesort
-- 可以建立 idx_test(A asc, B desc, C aes) 索引来解决
select A, C from test order by A asc, B desc;

-- 不符合左前缀原则，但是列都属于索引，索引使用 key_len=869，但是 Using filesort
select A, C from test order by A asc, C asc;

-- 符合左前缀原则，A 列使用索引，B 列排序 key_len=4
select * from test where A = 10 order by B asc;

-- 不完全符合左前缀原则，A 使用索引，排序失效 key_len=4, Using filesort
select * from test where A = 10 order by C asc;

-- 索引失效，type=ALL, Using filesort
select * from test where A > 10 order by C asc;

-- 符合左前缀原则，使用索引 key_len=102
select * from test where A = 1000 and B in ('123', '456') order by B asc;
select * from test where A = 1000 and B in ('123', '456') order by A asc, B asc, C asc;

-- 比较条件导致排序索引失效 key_len=4, Using filesort
select * from test where A < 1000 and B in ('123', '456') order by B asc;

-- 范围条件导致排序索引失效 key_len=102, Using filesort
select * from test where A in (10, 1000) and B in ('123', '456') order by B asc;

-- 当出现文件排序时，可对排序的列或者选择的列做处理，令其符合左前缀原则
select * from test where A = 10 order by B asc, C asc;
select A, B, C from test where A > 10 order by A asc, B asc, C asc;
select * from test where A < 1000 and B in ('123', '456') order by A asc, B asc;
select * from test where A in (10, 1000) and B in ('123', '456') order by A asc, B asc;

group by 语句在利用索引扫描时分为松散扫描和紧凑扫描两种情况，其中松散扫描是指只利用索引扫描就能实现满足条件的分组排序，无需扫描所有索引或者建立临时表来获取结果，效率会高很多。

下面是官网给出能用上松散扫描的情况：

select A, B from test group by A, B;
select distinct A, B from test group by A;
select A, B from test where A < 1000 group by A, B;
select max(A), min(A), B, C from test where A > 1000 group by A, B;
select B from test where A < 10000 group by A, B;
select A, B from test where C = '123' group by A, B;

当 group by 不能使用索引扫描完成时，在使用临时表分组时内部会进行 order by 排序再分组，这时可以使用 order by null 来取消内部排序。

-- Using where; Using temporary; Using filesort
select A, B from test where C = '123' group by A, B, D;

-- Using where; Using temporary
select A, B from test where C = '123' group by A, B, D order by null;

有时候会碰到调优器选择错误索引反而更慢的情况，除了可以用 use index、force index、ignore index 来指定索引外，还可以给不想用索引的列加上运算来略过，如 A + 0 = 1000、concat(C, '') = '123'。

alter table test add index idx_c(C) using btree;

-- key=idx_c, Using index condition; Using temporary; Using filesort
select A, B from test where C = '123' group by A, B;

-- key=idx_test, Using where; Using index
select A, B from test where concat(C, '') = '123' group by A, B;

参考

mysql复合索引、普通索引总结

建索引时注意字段选择性&范围查询注意组合索引的字段顺序

mysql优化之 group by索引松散扫描和紧凑扫描