百万级别数据Excel导出优化
前提
一些基本原理
数据导出方法(参数,输出流[OutputStream]){
1. 通过参数查询需要导出的结果集
2. 把结果集序列化为字节序列
3. 通过输出流写入结果集字节序列
4. 关闭输出流
}
@Data
public static class Parameter{
private OffsetDateTime paymentDateTimeStart;
private OffsetDateTime paymentDateTimeEnd;
}
public void export(Parameter parameter, OutputStream os) throws IOException {
List result =
orderDao.query(parameter.getPaymentDateTimeStart(), parameter.getPaymentDateTimeEnd()).stream()
.map(order -> {
OrderDTO dto = new OrderDTO();
......
return dto;
}).collect(Collectors.toList());
byte[] bytes = toBytes(result);
os.write(bytes);
os.close();
}

SELECT * FROM tableX WHERE id > #{lastBatchMaxId} [其他条件] ORDER BY id [ASC|DESC](这里一般选用ASC排序) LIMIT ${size}
public void export(Parameter parameter, OutputStream os) throws IOException {
long lastBatchMaxId = 0L;
for (;;){
List orders = orderDao.query([SELECT * FROM t_order WHERE id > #{lastBatchMaxId}
AND payment_time >= #{parameter.paymentDateTimeStart} AND payment_time <= #{parameter.paymentDateTimeEnd} ORDER BY id ASC LIMIT ${LIMIT}]);
if (orders.isEmpty()){
break;
}
List result =
orderDao.query([SELECT * FROM t_order]).stream()
.map(order -> {
OrderDTO dto = new OrderDTO();
......
return dto;
}).collect(Collectors.toList());
byte[] bytes = toBytes(result);
os.write(bytes);
os.flush();
lastBatchMaxId = orders.stream().map(Order::getId).max(Long::compareTo).orElse(Long.MAX_VALUE);
}
os.close();
}
❝
这里的滚动翻页方案远比LIMIT offset,size效率高,因为此方案每次查询都是最终的结果集,而一般的分页方案使用的LIMIT offset,size需要先查询,后截断。
❞
仿真案例
DROP TABLE IF EXISTS `t_order`;
CREATE TABLE `t_order`
(
`id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY COMMENT '主键',
`creator` VARCHAR(16) NOT NULL DEFAULT 'admin' COMMENT '创建人',
`editor` VARCHAR(16) NOT NULL DEFAULT 'admin' COMMENT '修改人',
`create_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`edit_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`version` BIGINT NOT NULL DEFAULT 1 COMMENT '版本号',
`deleted` TINYINT NOT NULL DEFAULT 0 COMMENT '软删除标识',
`order_id` VARCHAR(32) NOT NULL COMMENT '订单ID',
`amount` DECIMAL(10, 2) NOT NULL DEFAULT 0 COMMENT '订单金额',
`payment_time` DATETIME NOT NULL DEFAULT '1970-01-01 00:00:00' COMMENT '支付时间',
`order_status` TINYINT NOT NULL DEFAULT 0 COMMENT '订单状态,0:处理中,1:支付成功,2:支付失败',
UNIQUE uniq_order_id (`order_id`),
INDEX idx_payment_time (`payment_time`)
) COMMENT '订单表';
org.springframework.boot
spring-boot-starter-web
org.springframework.boot
spring-boot-starter-jdbc
mysql
mysql-connector-java
8.0.18
com.alibaba
easyexcel
2.2.6
public class OrderServiceTest {
private static final Random OR = new Random();
private static final Random AR = new Random();
private static final Random DR = new Random();
@Test
public void testGenerateTestOrderSql() throws Exception {
HikariConfig config = new HikariConfig();
config.setUsername("root");
config.setPassword("root");
config.setJdbcUrl("jdbc:mysql://localhost:3306/local?serverTimezone=Asia/Shanghai&useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false");
config.setDriverClassName(Driver.class.getName());
HikariDataSource hikariDataSource = new HikariDataSource(config);
JdbcTemplate jdbcTemplate = new JdbcTemplate(hikariDataSource);
for (int d = 0; d < 100; d++) {
String item = "('%s','%d','2020-07-%d 00:00:00','%d')";
StringBuilder sql = new StringBuilder("INSERT INTO t_order(order_id,amount,payment_time,order_status) VALUES ");
for (int i = 0; i < 20_000; i++) {
sql.append(String.format(item, UUID.randomUUID().toString().replace("-", ""),
AR.nextInt(100000) + 1, DR.nextInt(31) + 1, OR.nextInt(3))).append(",");
}
jdbcTemplate.update(sql.substring(0, sql.lastIndexOf(",")));
}
hikariDataSource.close();
}
}
@RequiredArgsConstructor
@Repository
public class OrderDao {
private final JdbcTemplate jdbcTemplate;
public List queryByScrollingPagination(long lastBatchMaxId,
int limit,
LocalDateTime paymentDateTimeStart,
LocalDateTime paymentDateTimeEnd) {
return jdbcTemplate.query("SELECT * FROM t_order WHERE id > ? AND payment_time >= ? AND payment_time <= ? " +
"ORDER BY id ASC LIMIT ?",
p -> {
p.setLong(1, lastBatchMaxId);
p.setTimestamp(2, Timestamp.valueOf(paymentDateTimeStart));
p.setTimestamp(3, Timestamp.valueOf(paymentDateTimeEnd));
p.setInt(4, limit);
},
rs -> {
List orders = new ArrayList<>();
while (rs.next()) {
Order order = new Order();
order.setId(rs.getLong("id"));
order.setCreator(rs.getString("creator"));
order.setEditor(rs.getString("editor"));
order.setCreateTime(OffsetDateTime.ofInstant(rs.getTimestamp("create_time").toInstant(), ZoneId.systemDefault()));
order.setEditTime(OffsetDateTime.ofInstant(rs.getTimestamp("edit_time").toInstant(), ZoneId.systemDefault()));
order.setVersion(rs.getLong("version"));
order.setDeleted(rs.getInt("deleted"));
order.setOrderId(rs.getString("order_id"));
order.setAmount(rs.getBigDecimal("amount"));
order.setPaymentTime(OffsetDateTime.ofInstant(rs.getTimestamp("payment_time").toInstant(), ZoneId.systemDefault()));
order.setOrderStatus(rs.getInt("order_status"));
orders.add(order);
}
return orders;
});
}
}
@Data
public class OrderDTO {
@ExcelIgnore
private Long id;
@ExcelProperty(value = "订单号", order = 1)
private String orderId;
@ExcelProperty(value = "金额", order = 2)
private BigDecimal amount;
@ExcelProperty(value = "支付时间", order = 3)
private String paymentTime;
@ExcelProperty(value = "订单状态", order = 4)
private String orderStatus;
}
@Service
@RequiredArgsConstructor
public class OrderService {
private final OrderDao orderDao;
private static final DateTimeFormatter F = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
public List queryByScrollingPagination(String paymentDateTimeStart,
String paymentDateTimeEnd,
long lastBatchMaxId,
int limit) {
LocalDateTime start = LocalDateTime.parse(paymentDateTimeStart, F);
LocalDateTime end = LocalDateTime.parse(paymentDateTimeEnd, F);
return orderDao.queryByScrollingPagination(lastBatchMaxId, limit, start, end).stream().map(order -> {
OrderDTO dto = new OrderDTO();
dto.setId(order.getId());
dto.setAmount(order.getAmount());
dto.setOrderId(order.getOrderId());
dto.setPaymentTime(order.getPaymentTime().format(F));
dto.setOrderStatus(OrderStatus.fromStatus(order.getOrderStatus()).getDescription());
return dto;
}).collect(Collectors.toList());
}
}
@RequiredArgsConstructor
@RestController
@RequestMapping(path = "/order")
public class OrderController {
private final OrderService orderService;
@GetMapping(path = "/export")
public void export(@RequestParam(name = "paymentDateTimeStart") String paymentDateTimeStart,
@RequestParam(name = "paymentDateTimeEnd") String paymentDateTimeEnd,
HttpServletResponse response) throws Exception {
String fileName = URLEncoder.encode(String.format("%s-(%s).xlsx", "订单支付数据", UUID.randomUUID().toString()),
StandardCharsets.UTF_8.toString());
response.setContentType("application/force-download");
response.setHeader("Content-Disposition", "attachment;filename=" + fileName);
ExcelWriter writer = new ExcelWriterBuilder()
.autoCloseStream(true)
.file(response.getOutputStream())
.head(OrderDTO.class)
.build();
// xlsx文件上上限是104W行左右,这里如果超过104W需要分Sheet
WriteSheet writeSheet = new WriteSheet();
writeSheet.setSheetName("target");
long lastBatchMaxId = 0L;
int limit = 500;
for (; ; ) {
List list = orderService.queryByScrollingPagination(paymentDateTimeStart, paymentDateTimeEnd, lastBatchMaxId, limit);
if (list.isEmpty()) {
writer.finish();
break;
} else {
lastBatchMaxId = list.stream().map(OrderDTO::getId).max(Long::compareTo).orElse(Long.MAX_VALUE);
writer.write(list, writeSheet);
}
}
}
}
导出数据耗时:29733 ms,start:2020-07-01 00:00:00,end:2020-07-16 00:00:00

小结
:https://github.com/zjcscut/spring-boot-guide/tree/master/ch10086-excel-export
