type Node name::UTF8String sn::Set{Node} n::Vector{Node} Node(name) = new(name, Set{Node}(), Node[]) end typealias Graph Dict{UTF8String, Node} function get(G::Graph, name) if has(G, name) return G[name] end G[name] = Node(name) end function centrality_mean(G::Graph, start_node, next::Vector{Node}, nnext::Vector{Node}) dists = Dict{Node,Uint64}() del_all(next) del_all(nnext) push(next, G[start_node]) cdist = 0 while !isempty(next) del_all(nnext) for n = next if !has(dists, n) dists[n] = cdist for neigh in n.n push(nnext, neigh) end end end cdist += 1 next,nnext = nnext,next end mean(values(dists)) end function read_graph() G = Graph() actors = Set() open("imdb-1.tsv", "r") do io while !eof(io) k = split(strip(readline(io)), "\t") actor, movie = k[1], k[2]*"_"*k[3] ac, mn = get(G, actor), get(G, movie) add(actors, actor) add(ac.sn, mn) add(mn.sn, ac) end end for (k,n) = G n.n = elements(n.sn) del_all(n.sn) end G, sort!(elements(actors)) end function main() G, actors = read_graph() d = Dict{UTF8String, Float64}() n,nn = Node[], Node[] for a in actors[1:100] d[a] = centrality_mean(G, a, n, nn) print("$a: ", d[a], "\n") end vals = sort!([(v,k) for (k,v) in d]) for i=1:20 print("$i: ", vals[i], "\n") end # print(centrality_mean(G, "Hoffman, Dustin"), "\n") end @time main()