Bagaimana memilih 1 juta catatan dari database, menulis ke Excel dan tidak crash dengan OutOfMemoryError

Baru-baru ini, saya diberi tugas menulis layanan yang hanya akan menangani satu, tetapi tugas yang sangat luas - mengumpulkan sejumlah besar data dari database, menggabungkan dan mengisi semuanya di Excel sesuai dengan templat tertentu. Dalam proses mencari solusi terbaik, beberapa pendekatan dicoba, dan masalah yang berkaitan dengan memori dan kinerja diselesaikan. Pada artikel ini, saya ingin berbagi dengan Anda poin utama dan tahapan pelaksanaan tugas ini.





1. Pernyataan masalah

Karena fakta bahwa saya tidak dapat membocorkan detail spesifikasi teknis, entitas, algoritme pengumpulan data, dll. Saya harus menemukan sesuatu yang serupa:





Jadi, bayangkan kita memiliki obrolan online dengan aktivitas tinggi, dan pelanggan ingin mengunduh semua pesan yang diperkaya dengan data pengguna untuk tanggal tertentu di Excel. Lebih dari 1 juta pesan dapat diakumulasikan per hari.





Kami memiliki 3 tabel:





  1. pengguna. Menyimpan nama pengguna dan beberapa peringkat (tidak peduli dari mana asalnya dan bagaimana cara menghitungnya)





  2. Pesan. Menyimpan data pesan - Nama Pengguna, DateTime, Teks pesan.





  3. Tugas. Tugas menghasilkan laporan, yang dibuat oleh pelanggan. Menyimpan ID, Status tugas (selesai atau tidak), dan dua parameter: Tanggal mulai pesan, Tanggal akhir pesan.





Kolom akan menjadi sebagai berikut:





Excel 4 1) message_date. 2) name. 3) rating. 4) text. 1 . excel, .





2. ,

, , – Spring + Hibernate . Oracle, .





spring-boot-starter-data-jpa, Spring Data, Hibernate JPA, .





        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-jpa</artifactId>
            <version>2.4.5</version>
        </dependency>
      
      



spring-boot-starter-test





        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
      
      







        <dependency>
            <groupId>com.oracle.database.jdbc</groupId>
            <artifactId>ojdbc10</artifactId>
            <version>19.10.0.0</version>
        </dependency>
      
      



. , TASK, β€œCREATED” , , . , , . Spring Data . :





package com.report.generator.demo.config;

import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.PropertySource;
import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
import org.springframework.scheduling.TaskScheduler;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;

@Configuration
@EnableScheduling
@EnableAsync
@EnableJpaRepositories(basePackages = "com.report.generator.demo.repository")
@PropertySource({"classpath:application.properties"})
@ConditionalOnProperty(
    value = "app.scheduling.enable", havingValue = "true", matchIfMissing = true
)
public class DemoConfig {
    private static final int CORE_POOL_SIZE = 2;

    @Bean(name = "taskScheduler")
    public TaskScheduler getTaskScheduler() {
        ThreadPoolTaskScheduler scheduler = new ThreadPoolTaskScheduler();
        scheduler.setPoolSize(CORE_POOL_SIZE);
        scheduler.initialize();
        return scheduler;
    }
}

      
      



@Scheduled , Task , , .





    @Async("taskScheduler")
    @Scheduled(fixedDelay = 60000)
    public void scheduledTask() {
        log.info("scheduledTask is started");
        Task task = getTask();
        if (Objects.isNull(task)) {
            log.info("task not found");
            return;
        }
        log.info("task found");
        generate(task);
    }
      
      



, GitHub.





3.

.. Hibernate . entity MessageData (id, name, rating, messageDate, test). – List<Message> :





List<Message> findAllByMessageDateBetween(Instant dateFrom, Instant dateTo);
      
      



MessageData . , List . , . 30 OutOfMemoryError .





List, – id , , , . . , . , , ScrollableResults. . , , , . .





– . .. , , , 200+ . java , . view. , , hibernate entity.





, 1 10 . . , , . – 1 , 10 , – 10 , 1 , . , view , . , , , . , , , 1 . .





, – . , entity. , , sys_refcursor, 300 , :





create function message_ref(
    date_from timestamp,
    date_to timestamp
) return sys_refcursor as
    ret_cursor sys_refcursor;
begin
    open ret_cursor for
        select m.id,
               u.name,
               u.rating,
               m.message_date,
               m.text
        from message m
                 left join users u on m.user_id = u.id
        where m.message_date between date_from and date_to;
    return ret_cursor;
end message_ref;
      
      



? @NamedNativeQuery. : "{ ? = call message_ref(?, ?) }", callable = true , , cacheMode = CacheModeType.IGNORE , . . , , resultClass = MessageData.class entity. MessageData :





package com.report.generator.demo.repository.entity;

import lombok.Data;
import org.hibernate.annotations.CacheModeType;
import org.hibernate.annotations.NamedNativeQuery;

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import java.io.Serializable;
import java.time.Instant;

import static com.report.generator.demo.repository.entity.MessageData.MESSAGE_REF_QUERY_NAME;

@Data
@Entity
@NamedNativeQuery(
    name = MESSAGE_REF_QUERY_NAME,
    query = "{ ? = call message_ref(?, ?) }",
    callable = true,
    cacheMode = CacheModeType.IGNORE,
    resultClass = MessageData.class
)
public class MessageData implements Serializable {

    public static final String MESSAGE_REF_QUERY_NAME = "MessageData.callMessageRef";

    private static final long serialVersionUID = -6780765638993961105L;

    @Id
    private long id;

    @Column
    private String name;

    @Column
    private int rating;

    @Column(name = "MESSAGE_DATE")
    private Instant messageDate;

    @Column
    private String text;
}
      
      



StatelessSession. : namedQuery hibernate CacheMode UnsupportedOperationException. :





            query.setHint(JPA_SHARED_CACHE_STORE_MODE, null);
            query.setHint(JPA_SHARED_CACHE_RETRIEVE_MODE, null);
      
      



:





 @Transactional
    void generate(Task task) {
        log.info("generating report is started");
        try (
            StatelessSession statelessSession = sessionFactory.openStatelessSession()
        ) {
            ReportExcelStreamWriter writer = new ReportExcelStreamWriter();
            Query<MessageData> query = statelessSession.createNamedQuery(MESSAGE_REF_QUERY_NAME, MessageData.class);
            query.setParameter(1, task.getDateFrom());
            query.setParameter(2, task.getDateTo());
            query.setHint(JPA_SHARED_CACHE_STORE_MODE, null);
            query.setHint(JPA_SHARED_CACHE_RETRIEVE_MODE, null);
            ScrollableResults results = query.scroll(ScrollMode.FORWARD_ONLY);
            int index = 0;
            while (results.next()) {
                index++;
                writer.createRow(index, (MessageData) results.get(0));
                if (index % 100000 == 0) {
                    log.info("progress {} rows", index);
                }
            }
            writer.writeWorkbook();
            task.setStatus(DONE.toString());
            log.info("task {} complete", task);
        } catch (Exception e) {
            task.setStatus(FAIL.toString());
            e.printStackTrace();
            log.error("an error occurred with message {}. While executing the task {}", e.getMessage(), task);
        } finally {
            taskRepository.save(task);
        }
    }
      
      



4. Excel

– excel , . – org.apache.poi. :





        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>5.0.0</version>
        </dependency>
      
      



XSSFWorkbook XSSFSheet, row . , :





package com.report.generator.demo.service;

import com.report.generator.demo.repository.entity.MessageData;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.io.FileOutputStream;
import java.io.IOException;
import java.time.Instant;

public class ReportExcelWriter {

    private final XSSFWorkbook wb;
    private final XSSFSheet sheet;

    public ReportExcelWriter() {
        this.wb = new XSSFWorkbook();
        this.sheet = wb.createSheet();
        createTitle();
    }

    public void createRow(int index, MessageData data) {
        XSSFRow row = sheet.createRow(index);
        setCellValue(row.createCell(0), data.getMessageDate());
        setCellValue(row.createCell(1), data.getName());
        setCellValue(row.createCell(2), data.getRating());
        setCellValue(row.createCell(3), data.getText());
    }

    public void writeWorkbook() throws IOException {
        FileOutputStream fileOut = new FileOutputStream(Instant.now().getEpochSecond() + ".xlsx");
        wb.write(fileOut);
        fileOut.close();
    }

    private void createTitle() {
        XSSFRow rowTitle = sheet.createRow(0);
        setCellValue(rowTitle.createCell(0), "Date");
        setCellValue(rowTitle.createCell(1), "Name");
        setCellValue(rowTitle.createCell(2), "Rating");
        setCellValue(rowTitle.createCell(3), "Text");
    }

    private void setCellValue(XSSFCell cell, String value) {
        cell.setCellValue(value);
    }

    private void setCellValue(XSSFCell cell, long value) {
        cell.setCellValue(value);
    }

    private void setCellValue(XSSFCell cell, Instant value) {
        cell.setCellValue(value.toString());
    }
}

      
      



. 3 1 excel. OutOfMemoryError. :





2Gb, OutOfMemoryError 30% .





excel , List, , stream, - google . I/O Stream excel, , . org.apache.poi , streaming. excel. streaming :





package com.report.generator.demo.service;

import com.report.generator.demo.repository.entity.MessageData;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;

import java.io.FileOutputStream;
import java.io.IOException;
import java.time.Instant;

public class ReportExcelStreamWriter {

    private final SXSSFWorkbook wb;
    private final SXSSFSheet sheet;

    public ReportExcelStreamWriter() {
        this.wb = new SXSSFWorkbook();
        this.sheet = wb.createSheet();
        createTitle();
    }

    public void createRow(int index, MessageData data) {
        SXSSFRow row = sheet.createRow(index);
        setCellValue(row.createCell(0), data.getMessageDate());
        setCellValue(row.createCell(1), data.getName());
        setCellValue(row.createCell(2), data.getRating());
        setCellValue(row.createCell(3), data.getText());
    }

    public void writeWorkbook() throws IOException {
        FileOutputStream fileOut = new FileOutputStream(Instant.now().getEpochSecond() + ".xlsx");
        wb.write(fileOut);
        fileOut.close();
    }

    private void createTitle() {
        SXSSFRow rowTitle = sheet.createRow(0);
        setCellValue(rowTitle.createCell(0), "Date");
        setCellValue(rowTitle.createCell(1), "Name");
        setCellValue(rowTitle.createCell(2), "Rating");
        setCellValue(rowTitle.createCell(3), "Text");
    }

    private void setCellValue(SXSSFCell cell, String value) {
        cell.setCellValue(value);
    }

    private void setCellValue(SXSSFCell cell, long value) {
        cell.setCellValue(value);
    }

    private void setCellValue(SXSSFCell cell, Instant value) {
        cell.setCellValue(value.toString());
    }
}

      
      



:





, , OutOfMemoryError.





5.

, StatelessSession, ScrollableResults org.apache.poi streaming. , jdbc, , , . . 1 . , . . . DemoApplicationTests. GitHub.








All Articles