【百万数据导出EasyPOI、MySQL】

mysql 数据库 java 2023-08-18 19:08:40 342人浏览泡泡鱼

摘要

0、600多万数据用EasyPOI从Mysql导出为什么是600多万？因为本来想整5000万来着，然后等了半天才600多万，不想等了，应该也够了。具体就是： 1、分页查询 2、使用线程池、异步任务并发处理每批次的数据 3、写入到多个sh

0、600多万数据用EasyPOI从Mysql导出

为什么是600多万？因为本来想整5000万来着，然后等了半天才600多万，不想等了，应该也够了。
具体就是：
1、分页查询
2、使用线程池、异步任务并发处理每批次的数据
3、写入到多个sheet（因为每个sheet 上限100万左右，就没有合并）（当然也可以写入到多个excel然后合并为一个Excel，思路大差不差的）

1、创建数据库、存储过程（字段按需更改）

DELIMITER $$CREATE PROCEDURE generate_test_data()BEGIN  DECLARE i INT DEFAULT 1;  DECLARE n INT DEFAULT 50000000; # 生成多少条数据  DECLARE randnum INT;    DROP TABLE IF EXISTS test_table;    CREATE TABLE test_table (    id INT PRIMARY KEY AUTO_INCREMENT,    name VARCHAR(255),    email VARCHAR(255),    age INT,    address VARCHAR(255)  );    WHILE i <= n DO    SET randnum = FLOOR(RAND() * 10000);    INSERT INTO test_table (name, email, age, address)     VALUES       (CONCAT('user', i), CONCAT('user', i, '@example.com'), randnum, CONCAT('address', i));    SET i = i + 1;  END WHILE;END$$DELIMITER ;

2、执行存储过程

CALL generate_test_data();

3、等了半天生成了600多w，想着也够了（有耐心的可以多等等，这三个在600多万数据下居然执行时间差不多少，也有可能是我数据表的字段比较少）

在这里插入图片描述

4、application.yml配置数据源

spring:  datasource:    url: jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&useSSL=false    username: root    passWord: 123321    driver-class-name: com.mysql.cj.jdbc.Driver

5、TestTableMapper.xml

<?xml version="1.0" encoding="UTF-8"?><!-- TestTableMapper.xml --><!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "Http://mybatis.org/dtd/mybatis-3-mapper.dtd"><mapper namespace="com.lfsun.dao.TestTableDAO">    <!--  分页查询数据  -->    <select id="pageQuery" resultType="com.lfsun.model.entity.TestTableDO">        SELECT id, name, age, email, address FROM test_table        WHERE 1=1        <if test="id != null">            AND id = #{id}        </if>        <if test="name != null">            AND name = #{name}        </if>        <if test="age != null">            AND age = #{age}        </if>        <if test="email != null">            AND email = #{email}        </if>        <if test="address != null">            AND address = #{address}        </if>        ORDER BY id DESC        <if test="pageIndex != null and pageSize != null">            LIMIT #{pageIndex},#{pageSize}        </if>    </select>    <!--  数据count  -->    <select id="pageQueryCount" resultType="java.lang.Long">        SELECT COUNT(1) as total FROM test_table        WHERE 1=1        <if test="id != null">            AND id = #{id}        </if>        <if test="name != null">            AND name = #{name}        </if>        <if test="age != null">            AND age = #{age}        </if>        <if test="email != null">            AND email = #{email}        </if>        <if test="address != null">            AND address = #{address}        </if>    </select></mapper>

6、TestTableDAO.java

package com.lfsun.dao;import com.lfsun.model.entity.TestTableDO;import org.apache.ibatis.annotations.Mapper;import org.apache.ibatis.annotations.Param;import java.util.List;import java.util.Map;@Mapperpublic interface TestTableDAO {         List<TestTableDO> pageQuery(Map param);        Long pageQueryCount(Map param);}

7、service

package com.lfsun.api.service;import com.alibaba.fastJSON.jsONObject;import com.lfsun.common.util.CommonResult;import java.io.IOException;public interface TestTableService {    void exportData() throws Exception;}

8、实现类

package com.lfsun.service.main;import com.alibaba.excel.EasyExcel;import com.alibaba.excel.EasyExcelFactory;import com.alibaba.excel.ExcelWriter;import com.alibaba.excel.write.metadata.WriteSheet;import com.alibaba.fastjson.JSON;import com.alibaba.fastjson.JSONObject;import com.lfsun.api.service.TestTableService;import com.lfsun.common.util.CommonAssert;import com.lfsun.common.util.CommonResult;import com.lfsun.common.util.PaginationDO;import com.lfsun.common.util.PaginationResult;import com.lfsun.dao.TestTableDAO;import com.lfsun.model.dto.TestTableDTO;import com.lfsun.model.entity.TestTableDO;import org.apache.commons.collections.CollectionUtils;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.springframework.stereotype.Service;import javax.annotation.Resource;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.concurrent.*;import java.util.concurrent.atomic.AtomicInteger;@Servicepublic class TestTableServiceImpl implements TestTableService {    private static final Logger LOGGER = LoggerFactory.getLogger(TestTableServiceImpl.class);    @Resource    private TestTableDAO testTableDAO;        @Override    public void exportData() throws Exception {        int pageSize = 100000; // 每批查询的数据量        int start = 0; // 查询的起始位置        boolean hasNext = true; // 是否还有下一批数据        List<Future<Void>> futureList = new ArrayList<>(); // 保存异步任务的Future对象        ExecutorService executor = Executors.newFixedThreadPool(10); // 创建线程池，最多10个线程并发执行        AtomicInteger count = new AtomicInteger();        WriteSheet build = EasyExcelFactory.writerSheet().build();        String[] sheetNames = new String[(int) (testTableDAO.pageQueryCount(new HashMap()) / pageSize)]; // Sheet 名称数组        WriteSheet[] sheets = new WriteSheet[sheetNames.length]; // WriteSheet 对象数组        for (int i = 0; i < sheetNames.length; i++) {            sheets[i] = EasyExcelFactory.writerSheet(i, "sheet" + i).build(); // 创建 WriteSheet 对象，并指定 sheetName        }        ExcelWriter writer = EasyExcel.write("output.xlsx").build(); // 创建 ExcelWriter 对象        while (hasNext) {            List<TestTableDO> dataList = testTableDAO.pageQuery(getPageMap(pageSize, start)); // 查询数据            if (dataList.isEmpty()) {                hasNext = false; // 没有数据了，结束查询            } else {                // 使用异步任务并发处理每批次的数据                Future<Void> future = executor.submit(() -> {                    try {                        int index = count.getAndIncrement() % sheetNames.length; // 计算当前数据应该写入的 sheet 的下标                        synchronized (writer) {writer.write(dataList, sheets[index], EasyExcelFactory.writerTable(index).build()); // 将数据写入到对应的 sheet 中                        }                    } catch (Exception e) {                        e.printStackTrace();                    }                    return null;                });                futureList.add(future); // 保存异步任务的 Future 对象                start += pageSize; // 更新查询的起始位置            }            LOGGER.info("当前 {}", start);        }        // 等待所有异步任务完成        for (Future<Void> future : futureList) {            future.get(); // 等待异步任务完成        }        writer.finish(); // 写入完成，关闭 ExcelWriter 对象        executor.shutdown(); // 关闭线程池    }        private static Map<String, Long> getPageMap(long pageSize, long start) {        Map<String, Long> pageMap = new HashMap<>(2);        pageMap.put("pageIndex", start);        pageMap.put("pageSize", pageSize);        return pageMap;    }}

9、controller

package com.lfsun.main.controller;import cn.hutool.core.thread.AsyncUtil;import com.alibaba.fastjson.JSONObject;import com.lfsun.api.service.TestTableService;import com.lfsun.common.util.AsyncUtils;import com.lfsun.common.util.CommonTemplate;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.springframework.WEB.bind.annotation.*;import javax.annotation.Resource;import javax.servlet.http.httpservletResponse;import java.io.IOException;import java.util.concurrent.CompletableFuture;@CrossOrigin@RestController@RequestMapping(value = "/testTable/")public class TestTableController {    private static final Logger LOGGER = LoggerFactory.getLogger(TestTableController.class);    @Resource    private TestTableService testTableService;    @RequestMapping("/export1")    @ResponseBody    public void exportData(HttpServletResponse response) throws Exception {        testTableService.exportData();    }}

DO 实体类忘贴了，这里补上：

package com.lfsun.model.entity;public class TestTableDO {        private Long id;        private String name;        private Integer age;        private String email;        private String address;    public Long getId() {        return id;    }    public void setId(Long id) {        this.id = id;    }    public String getName() {        return name;    }    public void setName(String name) {        this.name = name;    }    public Integer getAge() {        return age;    }    public void setAge(Integer age) {        this.age = age;    }    public String getEmail() {        return email;    }    public void setEmail(String email) {        this.email = email;    }    public String getAddress() {        return address;    }    public void setAddress(String address) {        this.address = address;    }}

10、针对导出测试（这里以下载到本地为例）：http://localhost:8888/testTable/export1

在这里插入图片描述
可见 100w耗时5s：

500w耗时：58.29 - 57.12 = 1.17 一分钟十七秒(因为用的是 limit，所以越靠后就会越慢：在 SQL 中，使用 LIMIT 关键字可以限制返回的行数，以提高查询效率。然而，当 LIMIT 接近查询结果集的末尾时，查询的速度可能会变慢。

这是因为在查询结果集中的每一行都需要按照指定的排序规则进行排序，然后根据 LIMIT 的限制选择要返回的行。因此，当 LIMIT 接近查询结果集的末尾时，需要对越来越多的数据进行排序操作，导致查询速度变慢。另外，如果查询中包含多个排序规则，查询的效率可能会更低，因为需要对每个排序规则进行排序操作。因此，为了提高查询效率，应该尽可能地减少排序规则的数量，并且在使用 LIMIT 时应尽可能地将其放在查询语句的开头。此外，如果查询中包含大量的数据，则可能需要使用分页技术来提高查询效率，而不是使用 LIMIT 来限制返回的行数。通过使用分页技术，可以根据需要逐步加载数据，从而避免一次性加载大量数据导致查询速度变慢的问题。)

2023-03-26 10:58:29.883 [http-NIO-8888-exec-1] INFO  c.l.s.main.TestTableServiceImpl: 当前 5000000

11、6528802条数据最终耗时 2.6分钟

500w时候 1分17s，可见差距
在这里插入图片描述

（可选）12、但是多人导出可能会出现的问题，实现类的export 方法需要做以下修改

@Override    public void exportData() throws Exception {        int pageSize = 100000; // 每批查询的数据量        int start = 0; // 查询的起始位置        boolean hasNext = true; // 是否还有下一批数据        ExecutorService executor = Executors.newFixedThreadPool(10); // 创建线程池，最多10个线程并发执行        CountDownLatch countDownLatch = new CountDownLatch(1); // 等待所有异步任务完成的计数器        ConcurrentMap<Integer, List<TestTableDO>> dataMap = new ConcurrentHashMap<>(); // 保存查询到的数据        AtomicInteger sheetIndex = new AtomicInteger(); // 当前写入的Sheet的下标        while (hasNext) {            List<TestTableDO> dataList = testTableDAO.pageQuery(getPageMap(pageSize, start)); // 查询数据            if (dataList.isEmpty()) {                hasNext = false; // 没有数据了，结束查询            } else {                // 使用异步任务并发处理每批次的数据                executor.execute(() -> {                    int index = sheetIndex.getAndIncrement(); // 计算当前数据应该写入的 sheet 的下标                    dataMap.put(index, dataList); // 将数据放入线程安全的ConcurrentHashMap中                    if (index == 0) {                        countDownLatch.countDown(); // 通知主线程开始写入数据                    }                });                start += pageSize; // 更新查询的起始位置            }            LOGGER.info("当前 {}", start);        }        countDownLatch.await(); // 等待异步任务完成        ExcelWriter writer = EasyExcel.write("output.xlsx").build(); // 创建 ExcelWriter 对象        for (int i = 0; i < sheetIndex.get(); i++) {            WriteSheet sheet = EasyExcelFactory.writerSheet(i, "sheet" + i).build(); // 创建 WriteSheet 对象，并指定 sheetName            List<TestTableDO> dataList = dataMap.get(i); // 获取当前Sheet的数据            writer.write(dataList, sheet, EasyExcelFactory.writerTable(i).build()); // 将数据写入到对应的 sheet 中        }        writer.finish(); // 写入完成，关闭 ExcelWriter 对象        executor.shutdown(); // 关闭线程池    }