1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
package spark.scheduler
import collection.mutable.ArrayBuffer
// information about a specific split instance : handles both split instances.
// So that we do not need to worry about the differences.
class SplitInfo(val inputFormatClazz: Class[_], val hostLocation: String, val path: String,
val length: Long, val underlyingSplit: Any) {
override def toString(): String = {
"SplitInfo " + super.toString + " .. inputFormatClazz " + inputFormatClazz +
", hostLocation : " + hostLocation + ", path : " + path +
", length : " + length + ", underlyingSplit " + underlyingSplit
}
override def hashCode(): Int = {
var hashCode = inputFormatClazz.hashCode
hashCode = hashCode * 31 + hostLocation.hashCode
hashCode = hashCode * 31 + path.hashCode
// ignore overflow ? It is hashcode anyway !
hashCode = hashCode * 31 + (length & 0x7fffffff).toInt
hashCode
}
// This is practically useless since most of the Split impl's dont seem to implement equals :-(
// So unless there is identity equality between underlyingSplits, it will always fail even if it
// is pointing to same block.
override def equals(other: Any): Boolean = other match {
case that: SplitInfo => {
this.hostLocation == that.hostLocation &&
this.inputFormatClazz == that.inputFormatClazz &&
this.path == that.path &&
this.length == that.length &&
// other split specific checks (like start for FileSplit)
this.underlyingSplit == that.underlyingSplit
}
case _ => false
}
}
object SplitInfo {
def toSplitInfo(inputFormatClazz: Class[_], path: String,
mapredSplit: org.apache.hadoop.mapred.InputSplit): Seq[SplitInfo] = {
val retval = new ArrayBuffer[SplitInfo]()
val length = mapredSplit.getLength
for (host <- mapredSplit.getLocations) {
retval += new SplitInfo(inputFormatClazz, host, path, length, mapredSplit)
}
retval
}
def toSplitInfo(inputFormatClazz: Class[_], path: String,
mapreduceSplit: org.apache.hadoop.mapreduce.InputSplit): Seq[SplitInfo] = {
val retval = new ArrayBuffer[SplitInfo]()
val length = mapreduceSplit.getLength
for (host <- mapreduceSplit.getLocations) {
retval += new SplitInfo(inputFormatClazz, host, path, length, mapreduceSplit)
}
retval
}
}
|