java csv list cant not repeat
require:
/**
* before:
* file A1.csv {1,2,3,4,5}
* file A2.csv {2,3,9,10,11}
* file B1.csv {5,12,13,14,15}
* file B2.csv {16,14,15,4,9,20,30}
* A1.csv A2.csv A3.csv A4.csv cant not repeat
*
* after:
* file A1.csv {1,4}
* file A2.csv {2,3,10,11}
* file B1.csv {12,13}
* file B2.csv {16,9,20,30}
*/
tangxin@tangxin:~/csvrepeat$ ls
A1.csv A2.csv B1.csv B2.csv
CSVUtilVersion2.java
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.springframework.util.StringUtils; import java.io.*;
import java.lang.reflect.Array;
import java.util.*; /**
* before:
* file A1.csv {1,2,3,4,5}
* file A2.csv {2,3,9,10,11}
* file B1.csv {5,12,13,14,15}
* file B2.csv {16,14,15,4,9,20,30}
* A1.csv A2.csv A3.csv A4.csv cant not repeat
*
* after:
* file A1.csv {1,4}
* file A2.csv {2,3,10,11}
* file B1.csv {12,13}
* file B2.csv {16,9,20,30}
*/
@Slf4j
public class CSVUtilVersion2 { private static final String CSV_PATH = "/home/tangxin/csvrepeat/";
private static final boolean CREATE_SWITCH = true; /**
* read single column data list
* @param path
* @return
*/
public static List<String> ids(String path) {
List<String> result = new ArrayList<>();
File csv = new File(path); // CSV文件路径
LineIterator it = null;
try {
it = FileUtils.lineIterator(csv);
while (it.hasNext()) {
String line = it.nextLine();
if (line.trim().contains("ID")) {
continue;
}
String[] arr = line.split(",");
String ID = arr[0];
ID = ID.replaceAll("\"", "").trim();
if (!StringUtils.isEmpty(ID)) {
result.add(ID);
}
}
} catch (Exception e) {
log.error("读取ID csv文件失败:{}", e.getMessage());
} finally {
LineIterator.closeQuietly(it);
}
return result;
} /**
* from src delete oth
* @param src
* @param oth
* @return
*/
public static List removeAll(List src, List oth) {
LinkedList result = new LinkedList(src);
HashSet othHash = new HashSet(oth);
Iterator iter = result.iterator();
while (iter.hasNext()) {
if (othHash.contains(iter.next())) {
iter.remove();
}
}
return result;
} /**
* -Xms1g -Xmx1g -XX:PermSize=128m -XX:SurvivorRatio=2 -XX:+UseParallelGC
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception { //∑=1+2+3+...+(n-1) group LinkedList<String> fileList = new LinkedList<>();
fileList.add("A1.csv");
fileList.add("A2.csv");
fileList.add("B1.csv");
fileList.add("B2.csv");
// fileList.add("C1.csv"); DescartesRepeat(fileList); ded(fileList); } private static void DescartesRepeat(LinkedList<String> fileList) {
Set<String> repeatList = new HashSet<>(); Set<String> groupSet = new HashSet<>(); Set<String> goONList = new HashSet<>(); //A1->A2,B1,B2
for (int i = 0; i < fileList.size(); i++) { String itemI = fileList.get(i); for (int j = 0; j < fileList.size(); j++) { String itemJ = fileList.get(j); if (!itemI.equals(itemJ)) { String groupR1 = itemI + "->" + itemJ;
String groupR2 = itemJ + "->" + itemI; if (groupSet.contains(groupR1) || groupSet.contains(groupR2)){
continue;
} groupSet.add(groupR1); String repeatT = repeat(CSV_PATH + itemI, CSV_PATH + itemJ);
if(!StringUtils.isEmpty(repeatT)){
repeatList.add(repeatT);
//System.out.println(groupR1+"->"+repeatT);
} } }
} if (CollectionUtils.isNotEmpty(repeatList)) {
// System.out.println(repeatList);
for (String repeatItem : repeatList) {
Iterator<String> iterator = fileList.iterator();
while (iterator.hasNext()) {
String oldItem = iterator.next(); String oldS = oldItem.replace(".csv", "").replace("-new","");
String repeatS = repeatItem.replace(".csv","").replace("-new","");
if (repeatS.contains(oldS)) {
iterator.remove();
goONList.add(repeatItem);
}
}
}
fileList.addAll(goONList);
System.out.println(fileList);
DescartesRepeat(fileList);
}
} public static void ded(List<String> args) { //保证指定csv列表每组都不能有重复数据
for (int i = 0; i < args.size(); i++) {
// if(i>0){
// continue;
// } String source = CSV_PATH + args.get(i); for (int j = 0; j < args.size(); j++) { if (i == j) {
continue;
} String target = CSV_PATH + args.get(j);
intersection(source, target);
} } } public static void intersection(String sourcePath, String targetPath) {
List<String> ids1 = ids(sourcePath);
List<String> ids2 = ids(targetPath);
List<String> inter = (List<String>) CollectionUtils.intersection(ids1, ids2);
System.out.println(sourcePath + "和" + targetPath + "的重复数据大小" + inter.size());
} public static String repeat(String source, String target){
//cdd fund xyd List<String> ids1 = ids(source);
List<String> ids2 = ids(target); // System.out.println(source + "集合大小" + ids1.size());
// System.out.println(target + "集合大小" + ids2.size()); List<String> inter = (List<String>) CollectionUtils.intersection(ids1, ids2); // System.out.println("去重数据大小:" + inter.size()); if (inter != null && inter.size() > 0) { if (ids1.size() > ids2.size()) {
return repeatInner(source, ids1, inter);
} else if (ids2.size() > ids1.size()) {
return repeatInner(target, ids2, inter);
} else {
return repeatInner(source, ids1, inter);
} } return "";
} private static String repeatInner(String source, List<String> ids, List<String> inter) {
String newPath = source.replace(".csv", "-new.csv");
List<String> ids1new = removeAll(ids, inter);
createCSV(ids1new, newPath);
return newPath.replace(CSV_PATH,"");
} /**
* 创建CSV文件
*/
public static void createCSV(List<String> list, String fileName) { if(!CREATE_SWITCH){
// System.out.println("创建csv开关关闭");
return;
}else{
// System.out.println("创建csv开关开启");
} // 表格头
Object[] head = {"ID"};
List<Object> headList = Arrays.asList(head); //数据
List<List<Object>> dataList = new ArrayList<>();
List<Object> rowList = null;
for (int i = 0; i < list.size(); i++) {
rowList = new ArrayList<>();
rowList.add(list.get(i));
dataList.add(rowList);
} File csvFile;
BufferedWriter csvWtriter = null;
try {
csvFile = new File(fileName);
File parent = csvFile.getParentFile();
if (parent != null && !parent.exists()) {
parent.mkdirs();
}
csvFile.createNewFile(); // GB2312使正确读取分隔符","
csvWtriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csvFile), "GB2312"), 1024); // 写入文件头部
writeRow(headList, csvWtriter); // 写入文件内容
for (List<Object> row : dataList) {
writeRow(row, csvWtriter);
}
csvWtriter.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
csvWtriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
} /**
* 写一行数据
*
* @param row 数据列表
* @param csvWriter
* @throws IOException
*/
private static void writeRow(List<Object> row, BufferedWriter csvWriter) throws IOException {
for (Object data : row) {
StringBuffer sb = new StringBuffer();
String rowStr = sb.append("\"").append(data).append("\",").toString();
csvWriter.write(rowStr);
}
csvWriter.newLine();
} }
tangxin@tangxin:~/csvrepeat$ ls
A1.csv A1-new.csv A1-new-new.csv A2.csv A2-new.csv B1.csv B2.csv B2-new.csv B2-new-new.csv
java csv list cant not repeat的更多相关文章
- Java CSV操作(导出和导入)
Java CSV操作(导出和导入) CSV是逗号分隔文件(Comma Separated Values)的首字母英文缩写,是一种用来存储数据的纯文本格式,通常用于电子表格或数据库软件.在 CSV文件 ...
- java csv - 读写及其操作.
今天帮同学处理数据, 主要是从1w多条记录中随机获取8k条, 然后再从8k条记录中随机获取2k条记录. 最后将2k条记录中随机分成10组,使得每组的记录都不重复. 下面将我的代码都贴上来, 好以后处理 ...
- java csv 文件 操作类
一个CSV文件操作类,功能比较齐全: package tool; import java.io.BufferedReader; import java.io.BufferedWriter; impor ...
- 【Java】Java CSV操作代码
CSV是逗号分隔文件(Comma Separated Values)的首字母英文缩写,是一种用来存储数据的纯文本格式,通常用于电子表格或数据库软件.在 CSV文件中,数据“栏”以逗号分隔,可允许程序通 ...
- 【Java/Csv/Regex】用正则表达式去劈分带引号的csv文件行,得到想要的行数据
csv文件是用引号分隔的文本行,为了完善内容人们又用引号把每个区块的内容又包了起来,于是形成下面的文件: "1","2","3"," ...
- 【Java/csv】一个CSV文件解析类(转载)
/*下文写得不错,值得学习**/ import java.io.BufferedReader; import java.io.FileReader; import java.util.ArrayLis ...
- Java csv
CsvWriter csvWriter = new CsvWriter("data2019052803.csv", ',', Charset.forName("UTF-8 ...
- java csv文件写入
List<String> list_code = null; 方案1 控制字符集: BufferedWriter bw=new BufferedWriter(new OutputStrea ...
- 统一的Json组件和csv下载组件
java-web-common java-web-common Json组件 目标和用途 规范Json接口格式 Controller中一律返回Java object,组件将自动转换数据格式,满足Jso ...
随机推荐
- c# 动态编译继承接口
c#里面的动态编译我就不讲了,主要的都有了.如果不熟悉我推荐博文 https://www.cnblogs.com/maguoyong/articles/5553827.html 标准的动态编译 这里主 ...
- rest_framework -- mixins&generics
上面的mixins.generics都是rest_framework里的模块,我们可以继承其中的某些类,达到代码量减少的效果,这里充分体现出了面向对象的继承 一.mixins模块 mixins : f ...
- js/jquery 禁用点击事件
前言 工作中经常遇到这种情况:验证邮箱页面的重新发送需要在3分钟后才可以点击触发请求,所以在这之前需要禁用他的点击. 网上查了后有以下几种实现方法 1.css禁用鼠标点击事件 .disabled { ...
- P1330 封锁阳光大学 DFS+染色
题目链接:https://www.luogu.org/problemnew/show/P1330 这个题有意思,如果能想到染色,就会很简单,但若想不到就很麻烦 要想把一条边封锁,就必须且只能占据这条边 ...
- springboot properties文件中的数据通过@Value()形式注入
首先在resources目录下新建一个properties文件,如下图 在photoPath.properties中写入内容,key=value的形式,如下图 在你需要引用properties的类头部 ...
- DevOps - 版本控制 - Git
配置 .gitignore 配置 .gitignore 配置文件用于配置不需要加入版本管理的文件,配置好该文件可以为我们的版本管理带来很大的便利. 有些时候,你必须把某些文件放到Git工作目录中,但又 ...
- vue $set修改对象
在vue开发中,当生成vue实例后,再次给数据赋值时,有时候并不会自动更新到视图上去: eg:<!DOCTYPE html> <html> <head> <m ...
- Sencha Visual Studio(IDE插件)
Sencha Visual Studio(IDE插件) 首先从官网上下载Visual Studio插件,注意不是VSCode编辑器,下载完后安装打开Visual Studio提示你去注册,输入你的se ...
- 【Nginx】Nginx配置REWRITE隐藏index.php
只需要在server里面加上 if (!-e $request_filename) { rewrite ^/(.*)$ /index.php/$1 last; break; }
- while,格式化输出
1. while循环: while 条件: 代码块(循环体) num=1 while num<=5: print(num) num+=1 break:结束循环;停止当前本层循环 continue ...