source: trunk/CrossPare/decent/epsilon/query/addLabels.eol @ 115

Last change on this file since 115 was 32, checked in by ftrautsch, 10 years ago

integrating decent into crosspare

File size: 6.1 KB
RevLine 
[32]1import "../libraries/arff/common.eol";
2import "../libraries/decent/common.eol";
3import "../libraries/decent/logging.eol";
4
5"Running addlabels".log(1);
6var start = Native("java.lang.System").currentTimeMillis();
7
8
9var s = ",";
10var targetAttributes = new Map;
11//TODO: export options
12targetAttributes.put("Artifact.Target.BugFix.AverageWeight",0.1);
13
14/*
15targetAttributes.put("Artifact.Target.Refactoring.AverageWeight",0.1);
16targetAttributes.put("Artifact.Target.Fix.AverageWeight",0.1);
17targetAttributes.put("Artifact.Target.IssueCount.AverageWeight",0.1);
18targetAttributes.put("Artifact.Target.IssueReference.AverageWeight",0.1);
19targetAttributes.put("Artifact.Target.UsersPerIssue.AverageWeight",0.1);
20targetAttributes.put("Artifact.Target.CommentsPerIssue.AverageWeight",0.1);
21
22targetAttributes.put("Artifact.Target.BugFix.Shared.AverageWeight",0.1);
23targetAttributes.put("Artifact.Target.Refactoring.Shared.AverageWeight",0.1);
24targetAttributes.put("Artifact.Target.Fix.Shared.AverageWeight",0.1);
25targetAttributes.put("Artifact.Target.IssueReference.Shared.AverageWeight",0.1);
26
27targetAttributes.put("Artifact.Target.BugFix.Churn.AverageWeight",0.1);
28targetAttributes.put("Artifact.Target.Refactoring.Churn.AverageWeight",0.1);
29targetAttributes.put("Artifact.Target.Fix.Churn.AverageWeight",0.1);
30targetAttributes.put("Artifact.Target.IssueReference.Churn.AverageWeight",0.1);
31
32targetAttributes.put("Artifact.Target.BugFix.Size.AverageWeight",0.1);
33targetAttributes.put("Artifact.Target.Refactoring.Size.AverageWeight",0.1);
34targetAttributes.put("Artifact.Target.Fix.Size.AverageWeight",0.1);
35targetAttributes.put("Artifact.Target.IssueReference.Size.AverageWeight",0.1);
36*/
37
38//does not work with binary resources
39//var modelFile = new Native("java.io.File") (ARFFx.getModelFile());
40var modelFile = new Native("java.io.File") (ARFFx.getModelImpl().getURI().toString().replaceAll("^file:",""));
41
42//var CONFIDENCE = "CONFIDENCE".getARFFAttribute();
43//var LABEL = "LABEL".getARFFAttribute();
44
45//TODO: move to common
46//TODO: remove once established at earlier steps
47var nestedAnonymousClassFilter = "\"[\\w]+\\$[\\d]+.*\"";
48
49var threshold = 100;
50
51//ARTIFACTS
52for (arffx in ARFFx!Model.allInstances().select(x|x.data.size() > 0)) {
53        arffx.checkTargetAttributes(targetAttributes);
54        arffx.checkForCompleteness();
55
56        //these will be recalculated for the bags..
57        arffx.setConfidenceThresholds(targetAttributes);
58        arffx.assignClassAndConfidence(targetAttributes);
59
60}
61
62
63var end = Native("java.lang.System").currentTimeMillis();
64var duration = end - start;
65("Duration: "+duration.toMinutes().round(5)).log(1);
66
67
68operation ARFFx!Model checkTargetAttributes(targetAttributes : Map) {
69        var notFound = targetAttributes.keySet().select(x|not self.attributes.exists(a|a.name = x));
70        for (a in notFound) {
71                targetAttributes.remove(a);
72        }
73}
74
75//slow?
76operation ARFFx!Model assignClassAndConfidence(targetAttributes : Map) {
77        for (baseAttribute in targetAttributes.keySet()) {
78                for (i in self.data) {
79                        i.assignClassAndConfidence(baseAttribute, targetAttributes.get(baseAttribute));
80                }
81        }
82}
83
84operation ARFFx!Model setConfidenceThresholds(targetAttributes : Map) {
85        //self.name.println(); 
86        //TODO: store as meta data
87        //TODO: store mean divisor as meta-data
88        for (a in targetAttributes.keySet) {
89                //("  "+a +" -> "+ targetAttributes.get(a)).println();
90                var v = self.data.collect(x|x.getValue(a.getARFFAttribute(self)).asDouble());
91                //v = v.normalizeMinMax(0.asDouble(), 1.asDouble());
92                //("  "+v.getMin()+" : "+v.getMax()+" : "+v.getMean()+" : "+v.getVariance()+" : "+v.getStandardDeviation()).println();
93                var t = (v.getMean()/2).round(4);
94                //("  "+a +" -> "+ t).println();
95                targetAttributes.put(a,t);
96                //("  Non-zero:\t\t"+v.select(x|x <> 0).size()).println();
97                //("  Above threshold:\t"+v.select(x|x > t).size()).println();
98        }
99        //"  updated".println();
100        //("    "+targetAttributes).println();
101}
102
103operation Collection setConfidenceThresholds(targetAttributes : Map) {
104        //TODO: store as meta data
105        //TODO: store mean divisor as meta-data
106        for (a in targetAttributes.keySet) {
107                var v = new Sequence();
108                for (arffx in self) {
109                        v.addAll(arffx.data.collect(x|x.getValue(a.getARFFAttribute(arffx)).asDouble()));
110                }
111                //v = v.normalizeMinMax(0.asDouble(), 1.asDouble());
112                //("  "+v.getMin()+" : "+v.getMax()+" : "+v.getMean()+" : "+v.getVariance()+" : "+v.getStandardDeviation()).println();
113                var t = (v.getMean()/2).round(4);
114                targetAttributes.put(a,t);
115                //("  Non-zero:\t\t"+v.select(x|x <> 0).size()).println();
116                //("  Above threshold:\t"+v.select(x|x > t).size()).println();
117        }
118}
119
120
121operation ARFFx!Model checkForCompleteness() {
122        var line = 1;
123        var NameAttribute = "Artifact.Name".getARFFAttribute(self);             
124        for (i in self.data.select(x|not x.getValue(NameAttribute).matches(nestedAnonymousClassFilter))) {
125        //for (i in self.data) {
126                line = line+1;
127                if (i.values.size() <> self.attributes.size()) {
128                        (self.name+" : Line "+line+" : Value and attribute counts do not match : " + i.values.size() +" vs "+ self.attributes.size()).log(1);
129                        (i.getValues(s).substring(1)).log(1);
130                        i.printMissingAttributes();
131                }
132        }
133}
134
135operation ARFFx!Instance printMissingAttributes() {
136        for (a in self.eContainer.attributes) {
137                if (not self.values.exists(v|v.ofAttribute = a)) {
138                        ("  Missing attribute: "+a.name).log(1);
139                }
140        }
141}
142
143
144operation ARFFx!Instance assignClassAndConfidence(baseAttribute : String, threshold : Real) : OrderedSet {
145        var confidenceAttribute = "CONFIDENCE."+baseAttribute;
146        var labelAttribute = "LABEL."+baseAttribute;
147       
148        //TODO: add attributes to filter
149        var base = self.getValue(baseAttribute.getARFFAttribute(self.eContainer()));
150        var label = "false";
151        var confidence = "high";
152        if (base.asDouble() > threshold) {
153                label = "true";
154        }
155        //TODO: also export as parameters
156        if (base.asDouble() < 1.01*threshold and base.asDouble() > 0.09*threshold) {
157                confidence = "low";
158        }
159        self.updateValue(confidence, confidenceAttribute);
160        self.updateValue(label, labelAttribute);
161}
162
163operation ARFFx!Instance getValues(s : String) : String {
164        var line = "";
165        for (v in self.values) {
166                line = line  + s + v.content;
167                //line = line  + s + v.ofAttribute.name+"="+v.content;
168        }
169        return line.replace("NaN","0.0").substring(0); //Substring? why?
170}
Note: See TracBrowser for help on using the repository browser.