SQL版本的Dijkstra最短路径算法

浏览数：29 / 时间：2015年06月12日

受这篇文章《SQL,NoSQL以及数据库的实质》结尾处题目的启发，我尝试写了一个SQL版本的Dijkstra最短路径算法。算法描述如下：

前提假设:

Hive支持Stored Procedure

或者

Mysql支持Insert into、insert overwrite、create table as select操作

数据结构:

Table meta_distances(

src int,

dst int,

distance int

)

Table known_nodes(

node int,

distance int

)

Table unknown_nodes(

node int,

distance int

)

求节点1到其他所有节点的距离

初始，各表包含的数据:

meta_distances:

各个节点之间的距离

known_nodes:

(1,0)

unknown_nodes:

(2, MaxInt)

(3, MaxInt)

(4, MaxInt)

…...

(n, MaxInt)

pivot_node=1

Create Procedure Dijkstra(IN pivot_node int)
     Begin
          declare unknown_nodes_count int default 1
          While unknown_nodes_count>0 do
               
               drop table tmp_distance_a
               //计算unknown_nodes中每一个node若经过pivot_node，与源点的距离
               create table tmp_distance_a
               as
               select
                    dst,
                    distance+(select distance from known_nodes where node=pivot_node)
               from
                    (select
                         distinct
                         meta_distances.*
                    from
                         meta_distances as f1,
                         unknown_nodes as f2
                    where
                         f1.dst=f2.node
                         and
                         src=pivot_node
                    )as tmp
                    
              //更新unknown_nodes的距离信息
               insert overwrite table unknown_nodes
               select
                    f2.node
                    IF(f1.node is null,
                         f2.distance,
                         IF(f1.distance>f2.distance,
                              f2.distance,
                              f1.distance
                         )
                    )
               from
                    tmp_distance_a as f1
               right outer join
                    unknown_nodes as f2
               on
                    f1.dst=f2.node

               //挑选出最小的node，放入known_nodes中
               insert into table known_nodes
               select
                    node,
                    distance
               from
                    unknown_nodes
               where
                    distance=min(distance)
              
               //挑选出最小的node，最为下一个pivot_node
               select
                    node
                    into pivot_node
               from
                    unknown_nodes
               where
                    distance=min(distance)

               //从unknown_nodes中删除最小node
               delete
               from
                    unknown_nodes
               where
                    distance=min(distance)
               
               //计算unknown_nodes中剩余node的数量
               select
                    count(*)
                    into unknown_nodes_count
               from
                    unknown_nodes

          End While
     End