Skip to content

Commit 7e956fb

Browse files
committed
Add Create SVM_Rank DataSet code
1 parent 5e9e893 commit 7e956fb

11 files changed

+728
-350
lines changed

AllPath.txt

100755100644
Lines changed: 402 additions & 268 deletions
Large diffs are not rendered by default.

PathCount.txt

100755100644
Lines changed: 67 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,67 @@
1-
[1, *, 5] 4
2-
[4, 6, 2] 4
3-
[6, 2, 5, 3] 4
4-
[4, *, 2, 3] 4
5-
[6, 2, 5] 4
6-
[4, 3] 4
7-
[1, 2] 4
8-
[6, *, 2] 4
9-
[4, *, 2] 4
10-
[6, 5] 4
11-
[4, 2] 4
12-
[4, *, 5, 3] 4
13-
[4, 6, 1, 2] 4
14-
[1, 2, 3] 4
15-
[6, *, *, 3] 4
16-
[2, 3] 4
17-
[4, 6, 3] 4
18-
[6, 2, *, 3] 4
19-
[6, 1, *, 5] 4
20-
[6, 1] 4
21-
[4, 6] 4
22-
[4, 6, *, 5] 4
23-
[4, *, 1, 2] 4
24-
[4, 2, 5] 4
25-
[6, *, 5] 4
26-
[6, 1, 2, 5] 4
27-
[1, 2, 5, 3] 4
28-
[4, 6, 2, 5] 4
29-
[4, *, 5] 4
30-
[4, *, 1] 4
31-
[4, *, *, 5] 4
32-
[1, *, 3] 4
33-
[2, *, 3] 4
34-
[4, 6, 5, 3] 4
35-
[6, *, 2, 3] 4
36-
[6, 1, 2] 4
37-
[4, *, 2, 5] 4
38-
[1, *, 5, 3] 4
39-
[1, 2, *, 3] 4
40-
[6, 2] 4
41-
[4, 5] 4
42-
[2, 5, 3] 4
43-
[4, *, *, 2] 4
44-
[4, 6, *, 2] 4
45-
[4, 2, 5, 3] 4
46-
[4, 5, 3] 4
47-
[6, 5, 3] 4
48-
[4, 6, 1] 4
49-
[6, *, *, 5] 4
50-
[1, *, *, 3] 4
51-
[6, *, 5, 3] 4
52-
[1, 2, 5] 4
53-
[2, 5] 4
54-
[6, 1, *, 3] 4
55-
[4, 6, 5] 4
56-
[6, 3] 4
57-
[5, 3] 4
58-
[4, 2, 3] 4
59-
[4, 6, *, 3] 4
60-
[6, 2, 3] 4
61-
[6, 1, 2, 3] 4
62-
[4, *, 3] 4
63-
[4, 6, 2, 3] 4
64-
[6, *, 2, 5] 4
65-
[4, 2, *, 3] 4
66-
[6, *, 3] 4
67-
[4, *, *, 3] 4
1+
4,*,2,3 6
2+
4,*,*,5 4
3+
1,2,5 6
4+
1,2,*,3 6
5+
4,*,*,3 8
6+
4,*,*,2 6
7+
6,5,3 6
8+
6,1,2 6
9+
6,*,5,3 4
10+
4,2,*,3 6
11+
4,6,*,5 4
12+
4,6,*,2 6
13+
6,1,2,3 6
14+
4,2,5,3 6
15+
4,6,*,3 6
16+
6,1,2,5 4
17+
4,6,2,5 4
18+
6,1,*,3 6
19+
4,6,2,3 6
20+
6,1,*,5 4
21+
4,2,3 8
22+
6,*,5 4
23+
6,*,*,5 4
24+
4,6,1 6
25+
4,2,5 6
26+
6,*,*,3 6
27+
4,6,3 4
28+
1,*,5,3 6
29+
4,6,2 6
30+
6,*,3 6
31+
4,6,5 6
32+
6,*,2 6
33+
2,3 8
34+
4,2 8
35+
6,1 6
36+
4,3 8
37+
2,5 6
38+
6,2,*,3 4
39+
6,2 6
40+
1,2,5,3 6
41+
6,3 4
42+
4,5 8
43+
4,6 8
44+
4,*,5,3 8
45+
6,5 6
46+
4,*,1,2 6
47+
4,*,1 6
48+
4,*,3 8
49+
6,2,5 4
50+
4,*,2 6
51+
6,*,2,5 4
52+
4,*,5 8
53+
6,*,2,3 6
54+
6,2,3 6
55+
2,5,3 6
56+
2,*,3 6
57+
4,6,5,3 6
58+
4,6,1,2 6
59+
1,*,3 8
60+
4,5,3 8
61+
1,2 8
62+
6,2,5,3 4
63+
1,2,3 8
64+
5,3 8
65+
4,*,2,5 4
66+
1,*,5 6
67+
1,*,*,3 6

dotFile/Demo.resizeArray1.dot

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ digraph G {
77
6 [label="Object.getClass" shape=box style=rounded startLine=7 endLine=7]
88
7 [label="C:#Users#22166#Desktop#wjwase#testtest.java" shape=rounded startLine=-1 endLine=-1]
99
1 -> 2 [label=""];
10-
2 -> 5 [label=""];
1110
2 -> 3 [label=""];
1211
4 -> 3 [label=""];
1312
4 -> 5 [label=""];

dotFile/Demo.resizeArray2.dot

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
digraph G {
2+
1 [label="Class.getComponentType" shape=box style=rounded startLine=8 endLine=8]
3+
2 [label="Array.newInstance" shape=box style=rounded startLine=9 endLine=9]
4+
3 [label="CONTROL.IF" shape=diamond startLine=11 endLine=13]
5+
4 [label="Array.getLength" shape=box style=rounded startLine=6 endLine=6]
6+
5 [label="Math.min" shape=box style=rounded startLine=10 endLine=10]
7+
6 [label="Object.getClass" shape=box style=rounded startLine=7 endLine=7]
8+
7 [label="C:#Users#22166#Desktop#wjwase#testtest.java" shape=rounded startLine=-1 endLine=-1]
9+
1 -> 2 [label=""];
10+
2 -> 5 [label=""];
11+
2 -> 3 [label=""];
12+
4 -> 3 [label=""];
13+
4 -> 5 [label=""];
14+
4 -> 2 [label=""];
15+
4 -> 6 [label=""];
16+
5 -> 3 [label=""];
17+
6 -> 5 [label=""];
18+
6 -> 1 [label=""];
19+
6 -> 2 [label=""];
20+
}

dotFile/Demo.resizeArray3.dot

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
digraph G {
2+
1 [label="Class.getComponentType" shape=box style=rounded startLine=8 endLine=8]
3+
2 [label="Array.newInstance" shape=box style=rounded startLine=9 endLine=9]
4+
3 [label="CONTROL.IF" shape=diamond startLine=11 endLine=13]
5+
4 [label="Array.getLength" shape=box style=rounded startLine=6 endLine=6]
6+
5 [label="Math.min" shape=box style=rounded startLine=10 endLine=10]
7+
6 [label="Object.getClass" shape=box style=rounded startLine=7 endLine=7]
8+
7 [label="C:#Users#22166#Desktop#wjwase#testtest.java" shape=rounded startLine=-1 endLine=-1]
9+
1 -> 2 [label=""];
10+
2 -> 5 [label=""];
11+
2 -> 3 [label=""];
12+
4 -> 3 [label=""];
13+
4 -> 5 [label=""];
14+
4 -> 2 [label=""];
15+
4 -> 6 [label=""];
16+
5 -> 3 [label=""];
17+
}

src/CreateDataSet.java

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import java.io.*;
2+
import java.util.ArrayList;
3+
import java.util.HashMap;
4+
import java.util.List;
5+
import java.util.Map;
6+
7+
/**
8+
* 输入:hole由正确答案填充后的groum,以及top10结果集合,且正确答案为结果集合中的第一个元素
9+
*/
10+
public class CreateDataSet {
11+
12+
static HashMap<String, Integer> pathCounter;
13+
static Map<String, String> API2Index;
14+
15+
/**
16+
* 读取api2index的Map
17+
* @param file
18+
* @throws IOException
19+
*/
20+
private static void getPathCounter(File file) throws IOException {
21+
pathCounter = new HashMap<>();
22+
FileReader fileReader = new FileReader(file);
23+
BufferedReader bufferedReader = new BufferedReader(fileReader);
24+
String row;
25+
String path; Integer num;
26+
while ((row = bufferedReader.readLine()) != null) {
27+
path = row.split(" ")[0];
28+
num = new Integer(row.split(" ")[1]);
29+
pathCounter.put(path, num);
30+
}
31+
bufferedReader.close();
32+
fileReader.close();
33+
}
34+
35+
/**
36+
* 创建适用于SVM_Rank的数据集
37+
* 对单个Groum进行处理
38+
* 在做top-10结果替换的时候,做path数目的查询
39+
* @param groum hole由正确答案填充后的groum
40+
* @param results top10结果集合, results第一个为正确答案
41+
*/
42+
public static void create(Groum groum, List<String> results, int qId, File file, PrintWriter pW, Map<String, String> A2I) throws IOException {
43+
getPathCounter(file);
44+
API2Index = A2I;
45+
Map<String, GroumNode> nodeMap = groum.getNodeMap();
46+
GroumNode groumNode = null;
47+
List<String> startList = null;
48+
String rows = null;
49+
for (String id : nodeMap.keySet()) {
50+
groumNode = nodeMap.get(id);
51+
if (groumNode.getOriginalApi().equals(results.get(0))) {
52+
startList = new ArrayList<>();
53+
startList.add(id);
54+
List<List<String>> outList = GetPath.getAllPath(groum, startList, 4); // 此处所得到的path表示是使用Map中的API的index构成
55+
rows = createRows(outList, results, groumNode.getApi(), qId);
56+
writeFile(pW, rows);
57+
}
58+
}
59+
}
60+
61+
/**
62+
* 将String变为List,并得到填充hole的Path(aPath)以及除去hole以外的剩余API组成的Path(bPath)
63+
* @param list
64+
* @param id
65+
* @param result
66+
* @return
67+
*/
68+
private static List<String> convertListToStringAndGetAPathBPath(List<String> list, String id, String result) {
69+
List<String> ret = new ArrayList<String>();
70+
StringBuilder aPath = new StringBuilder();
71+
StringBuilder bPath = new StringBuilder();
72+
boolean ifStarted = false;
73+
boolean ifMeetHole = false;
74+
for (String item : list) {
75+
if (item.equals(id)) {
76+
aPath.append(result);
77+
aPath.append(",");
78+
}
79+
else {
80+
aPath.append(item);
81+
bPath.append(item);
82+
aPath.append(",");
83+
bPath.append(",");
84+
}
85+
}
86+
if (aPath.charAt(aPath.length() - 1) == ',') aPath.deleteCharAt(aPath.length() - 1);
87+
if (bPath.charAt(bPath.length() - 1) == ',') bPath.deleteCharAt(bPath.length() - 1);
88+
ret.add(aPath.toString());
89+
ret.add(bPath.toString());
90+
return ret;
91+
}
92+
93+
/**
94+
* 构建形式为"3 qid:1 1:1 2:1 3:0 4:0.2 5:0"每行数据元
95+
* @param outList 使用hole抽取的Path
96+
* @param results top10结果集
97+
* @param id 正确答案在Map中的Id
98+
* @param qId 数据元中的qid
99+
*/
100+
private static String createRows(List<List<String>> outList, List<String> results, String id, int qId) {
101+
StringBuilder rows = new StringBuilder();
102+
List<String> midRes = null;
103+
double feature = 0;
104+
int featureId = 1;
105+
boolean ifFirst = true;
106+
String resultId;
107+
for (String result : results) {
108+
if (ifFirst) {
109+
rows.append("2"); // 正确答案的rank值为2
110+
ifFirst = false;
111+
}
112+
else {
113+
rows.append("1"); // 非正确答案的rank值均为1
114+
}
115+
rows.append(" ");rows.append("qid:");rows.append(qId);rows.append(" ");
116+
featureId = 1;
117+
boolean ifFirstFeature = true;
118+
for (List<String> path : outList) {
119+
resultId = API2Index.get(result);
120+
midRes = convertListToStringAndGetAPathBPath(path, id, resultId);
121+
if (midRes.get(1).length() == 1) continue;
122+
System.out.println(midRes.get(1));
123+
System.out.println(midRes.get(0));
124+
feature = (double)pathCounter.getOrDefault(midRes.get(1), 0) / pathCounter.getOrDefault(midRes.get(0), 1); // 此处对a值设定最小为1
125+
if (ifFirstFeature) ifFirstFeature = false;
126+
else rows.append(" ");
127+
rows.append(featureId);rows.append(":");rows.append(feature);
128+
++featureId;
129+
}
130+
rows.append("\n");
131+
}
132+
return rows.toString();
133+
}
134+
135+
/**
136+
* 将一个Groum的数据集写入文件
137+
* @param pW
138+
* @param rows
139+
*/
140+
private static void writeFile(PrintWriter pW, String rows) {
141+
pW.write(rows);
142+
}
143+
}

src/GetPath.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,12 @@ private static void removeDuplicate(List<List<String>> target) {
105105
target.clear();
106106
target.addAll(set);
107107
}
108+
109+
/**
110+
* 将图上的id转为Map中的id
111+
* @param pathResult
112+
* @param source
113+
*/
108114
public static void convert(List<List<String>> pathResult, List<List<String>> source) {
109115
Map<String, GroumNode> nodeMap = targetGraph.getNodeMap();
110116
for (List<String> sourcePath : source) {

src/GroumNode.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
public class GroumNode implements Serializable {
66

7-
private String id;
8-
private String api;
9-
private String originalApi;
7+
private String id; // 图上节点id
8+
private String api; // API在Map中的index
9+
private String originalApi; // API名字
1010
private int startLine;
1111
private int endLine;
1212
private List<GroumNode> children = new ArrayList<>();

src/Main.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
public class Main {
88
public static void main(String[] args) throws IOException {
99
Map<String, String> API2Index;
10-
File api2index = new File("/home/x/mydisk/RecRank/apiNew.txt");
11-
File dotFile = new File("/home/x/mydisk/Gralan/wjwase/datafile2");
12-
File allPathFile = new File("/home/x/mydisk/RecRank/allPathFile.txt");
13-
File countPathFile = new File("/home/x/mydisk/RecRank/countPathFile.txt");
10+
File api2index = new File("/Users/zhangmingrui/Desktop/RecRankJavaEmersion/APIIndexMap.txt");
11+
File dotFile = new File("/Users/zhangmingrui/Desktop/RecRankJavaEmersion/dotFile");
12+
File allPathFile = new File("/Users/zhangmingrui/Desktop/RecRankJavaEmersion/AllPath.txt");
13+
File countPathFile = new File("/Users/zhangmingrui/Desktop/RecRankJavaEmersion/PathCount.txt");
1414
System.out.println("Read API2Index...");
1515
API2Index = ReadAPI2Index.getAPI2IndexMap(api2index);
1616
System.out.println("Finish Read API2Index");

0 commit comments

Comments
 (0)