两个文件进行行读取进行嵌套循环连接

文件A.txt

1|369001|O|186600.18
2|780017|O|66219.63
3|1233140|F|270741.97
4|1367761|O|41714.38
5|444848|F|122444.33
6|556222|F|50883.96

文件B.txt

1|7759468|384484
1|3365454|365455
1|3184989|184990
2|5308487|58508
3|214849|89850
3|951772|326776
3|1468981|93984
4|4401735|276760
5|5428465|428466
5|6196340|196341
5|1876509|1513
6|6981773|106787

读取两个文件,A.txt,B.txt,两个文件的第一行为主键,当主键相等时进行连接,写文件。类似于数据库中的join连接。
A,B主键都有序,A主键重复
方案一
读取B.txt一行,然后扫描整个A.txt 主键相等时,连接

 public static void main(String[] args) throws IOException {

            FileInputStream A = null;
        FileInputStream B = null;
        InputStreamReader A_isr = null;
        InputStreamReader B_isr = null;
        BufferedReader A_br = null; // 用于包装InputStreamReader,提高处理性能。因为BufferedReader有缓冲的,而InputStreamReader没有。
        BufferedReader B_br = null;
        FileOutputStream output_fos = null;
        OutputStreamWriter output_osw = null;
        BufferedWriter output_bw = null;
        try {

            orders_tbl = new FileInputStream("D:" + File.separator + "bigfile" + File.separator + "A.txt");// FileInputStream
            lineitem_tbl = new FileInputStream("D:" + File.separator + "bigfile" + File.separator + "B.txl");// FileInputStream
            // 从文件系统中的某个文件中获取字节
            A_isr = new InputStreamReader(A);// InputStreamReader 是字节流通向字符流的桥梁
            B_isr = new InputStreamReader(B);
            A_br = new BufferedReader(A_isr);// 从字符输入流中读取文件中的内容,封装了一个new InputStreamReader的对象
            B_br = new BufferedReader(B_isr);
            output_fos = new FileOutputStream(// 输出文件位置
                    new File("D:" + File.separator + "bigfile" + File.separator + "output.txt"));
            output_osw = new OutputStreamWriter(output_fos, "UTF-8");
            output_bw = new BufferedWriter(output_osw);
            String A_line = "";
            String B_line ="";
            while ((A_line = A_br.readLine()) != null) {
                String A_line_1[] = A_line.split("\\|", 2);
                while ((B_line = B_br.readLine()) != null) {
                    String B_line_1[] = B_line.split("\\|", 2);
                     if(Integer.parseInt(A_line_1[0])==(Integer.parseInt(B_line_1[0]))) {
                        output_bw.write(A_line + B_line+"\n");
                        output_bw.flush();
                     }
                     if(Integer.parseInt(A_line_1[0])<(Integer.parseInt(B_line_1[0]))) {
                         break;
                     }
                }
            }
        } catch (FileNotFoundException e) {
            System.out.println("找不到指定文件");
        } catch (IOException e) {
            System.out.println("读取文件失败");
        } finally {
            try {
                // 注意关闭的先后顺序,先打开的后关闭,后打开的先关闭
                output_bw.close();
                output_osw.close();
                output_fos.close();
                A_br.close();
                B_br.close();
                A_isr.close();
                B_isr.close();
                A.close();
                B.close();
                // 关闭的时候最好按照先后顺序关闭最后开的先关闭所以先关s,再关n,最后关m
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

问题:B.txt读取一行没有问题,但是A.txt读取完成后,B.txt读取一行,A.txt已经读取完成,无法重复读取
所以求方法。(A.txt放进内存,重复读取)
方案二
投机取巧型
A.txt有序
先读取A.txt一行,再扫描B.txt,当主键相等时进行连接到同一行,不相等时候终止循环

 public static void main(String[] args) throws IOException {

        FileInputStream A = null;
        FileInputStream B = null;
        InputStreamReader A_isr = null;
        InputStreamReader B_isr = null;
        BufferedReader A_br = null; // 用于包装InputStreamReader,提高处理性能。因为BufferedReader有缓冲的,而InputStreamReader没有。
        BufferedReader B_br = null;
        FileOutputStream output_fos = null;
        OutputStreamWriter output_osw = null;
        BufferedWriter output_bw = null;
        try {

            orders_tbl = new FileInputStream("D:" + File.separator + "bigfile" + File.separator + "A.txt");// FileInputStream
            lineitem_tbl = new FileInputStream("D:" + File.separator + "bigfile" + File.separator+       "B.txt");// FileInputStream
            // 从文件系统中的某个文件中获取字节
            A_isr = new InputStreamReader(A);// InputStreamReader 是字节流通向字符流的桥梁
            B_isr = new InputStreamReader(B);
            A_br = new BufferedReader(A_isr);// 从字符输入流中读取文件中的内容,封装了一个new InputStreamReader的对象
            B_br = new BufferedReader(B_isr);
            output_fos = new FileOutputStream(// 输出文件位置
                    new File("D:" + File.separator + "bigfile" + File.separator + "output.txt"));
            output_osw = new OutputStreamWriter(output_fos, "UTF-8");
            output_bw = new BufferedWriter(output_osw);
            String A_line = "";
            String B_line ="";
            while ((A_line = A_br.readLine()) != null) {
                while ((B_line = B_br.readLine()) != null) {
                    String A_line_1[] = A_line.split("\\|", 2);
                    String B_line_1[] = B_line.split("\\|", 2);
                    if (Integer.parseInt(A_line_1[0]) < Integer.parseInt(B_line_1[0])) {
                        break;
                    }
                    if (Integer.parseInt(A_line_1[0]) == Integer.parseInt(B_line_1[0])) {
                         output_bw.write(A_line + B_line+"\n");
                         output_bw.flush();
                    }
                }

            }
        } catch (FileNotFoundException e) {
            System.out.println("找不到指定文件");
        } catch (IOException e) {
            System.out.println("读取文件失败");
        } finally {
            try {
                // 注意关闭的先后顺序,先打开的后关闭,后打开的先关闭
                output_bw.close();
                output_osw.close();
                output_fos.close();
                A_br.close();
                B_br.close();
                A_isr.close();
                B_isr.close();
                A.close();
                B.close();
                // 关闭的时候最好按照先后顺序关闭最后开的先关闭
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

问题:A读取一行没有问题,B读取一行,当主键相等是连接写文件,但是需要读取下一行数据才能判断不相等,会造成数据丢失。

https://blog.csdn.net/tomoya_chen/article/details/68958274

依题意,A文件主键重复,B文件主键不重复
先读取B文件,将B文件的内容存入Map对象,即Map《B文件主键, B文件内容》
遍历A文件,根据A文件主键检查Map对象中是否有匹配的B文件,如果存在则拼接内容并写入文件,否则continue;
如果存在主键重复的情况,可以对前述方案进行改进,比如A文件,将文件内容存入Map对象,即Map《A文件主键,List》
List里放文件内容

并非是最好的答案,前提是A,B主键都有序,A主键重复,所以B表在前,A表在后面。虽然Map在很小的数据量时可以解决,但是当数据量很大时会造成内存溢出。最好的解答方式是,A,B无序,并且能够处理的数据量超过内存,需要进行存储优化,和先进行排序。

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;

public class TestDemo{

    public static void main(String[] args) throws IOException {
        long startTime = System.currentTimeMillis();
        BufferedReader orders_br = null;
        BufferedReader lineitem_br = null;
        BufferedWriter output_bw = null;
        try {
            // input file
            orders_br = new BufferedReader(new InputStreamReader(new FileInputStream("/root/test/2.17.3/dbgen/orders.tbl")));
            lineitem_br = new BufferedReader(new InputStreamReader(new FileInputStream("/root/test/2.17.3/dbgen/lineitem.tbl")));
            // output file
            output_bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File("/root/test/result.tbl"))));
            // table every line
            String orders_line = null;
            String lineitem_line = null;
            // each line id
            String orders_line_id = null;
            String lineitem_line_id = null;
            while ((orders_line = orders_br.readLine()) != null) {
                orders_line_id = orders_line.substring(0, orders_line.indexOf("|"));
                while (true) {
                    if(lineitem_line == null) {
                        lineitem_line = lineitem_br.readLine();
                        if (lineitem_line == null)
                            break;
                    }
                    lineitem_line_id = lineitem_line.substring(0, lineitem_line.indexOf("|"));
                    if (orders_line_id.equals(lineitem_line_id)) {
                        output_bw.write(orders_line + lineitem_line + "\n");
                        lineitem_line = null;
                    } else {
                        break;
                    }
                }
            }
            output_bw.close();
            lineitem_br.close();
            orders_br.close();
        } catch (FileNotFoundException e) {
            System.out.println("can not find file");
        } catch (IOException e) {
            System.out.println("read file failure");
            e.printStackTrace();
        }
        long endTime = System.currentTimeMillis();
        long runTime = endTime - startTime;
        System.out.println("Demo running time:" + runTime / 1000 + " second ");
    }
}