Allenfenqu/srg_kmeans.py

import numpy as np
import pandas as pd
from TOPSIS import TOPSIS
import random
links = pd.read_csv('links_processed.csv')

links = links.to_numpy()
Initial_partitions=60
# for chuu in range(1, 17):
#     for dic in range(1):
#         tic()
#         chu = 10
#         zhong = 2


# 给道路起点和终点标注序列，eg从1到500，
# 因为一个路口可以是好几个道路的起点或终点，所以同一路口就会有同样的标记
node = np.concatenate((links[:, :2], links[:, 2:4]), axis=0)  # np.concatenate 函数会将这两个子数组沿着轴 0 连接起来;
# axis 是指在数组操作时沿着哪个轴进行操作。当axis=0时，表示在第一个维度上进行拼接操作。这里就是纵轴

# 这里是给道路起点和终点标注序列，也就是路口表注序列，因为一个路口可以是好几个道路的起点或终点，所以同一路口就会有同样的标记
noi = 1
node = np.hstack((node, np.zeros((len(node), 1))))

for i in range(node.shape[0]):  # node.shape[0] 是指 node 数组的第一维大小，即 node 数组的行数

    # node[:i, 0] 表示从 node 数组的第一行到第 i-1 行的所有行的第一列构成的数组
    #  np.where() 函数返回一个包含下标的元组，后面的[0]就代表返回第一个元素的下标
    a = np.where(node[:i, 0] == node[i, 0])[0]
    b = np.where(node[:i, 1] == node[i, 1])[0]
    c = np.intersect1d(a, b)  # intersect1d 返回两个数组的交集
    if c.size > 0:
        x = c.shape[0]
        y = 1
    else:
        x, y = 0, 1
    # 在 node 数组的最后添加一列全为0的列，并将添加后的新数组重新赋值给 node
    if x > 0 and y > 0:
        node[i, 2] = node[min(c), 2]  # 如果c是矩阵，则min(A)是包含每一列的最小值的行向量
    else:
        node[i, 2] = noi
        noi += 1
node = np.concatenate((node[:int(len(node) / 2), 2].reshape(-1, 1), node[int(len(node) / 2):, 2].reshape(-1, 1)),
                      axis=1)

np.save('node.npy', node)

#  这里的links多加了一行才能yanlinks，但这样yanlinks就不对了
links = np.hstack((links, np.zeros((len(links), 1))))
links = np.hstack((links, np.zeros((len(links), 1))))
links = np.hstack((links, np.zeros((len(links), 1))))
yanlinks = np.concatenate((node, links[:, [5, 6, 7, 4, 0, 1, 2, 3]], np.zeros((len(links), 4))), axis=1)
yanlinks[:, 4] = np.arange(1, len(yanlinks) + 1)

road = np.arange(1, node.shape[0] + 1)
adjacency = np.zeros((len(road), len(road)))

# 初始化分区

for i in range(len(road)):
    temp1 = np.where(node[:, 0] == node[i, 0])[0]  # 找出第一列每个数字在第一列出现的位置
    temp2 = np.where(node[:, 1] == node[i, 0])[0]  # 找出第一列每个数字在第二列出现的位置
    temp3 = np.where(node[:, 0] == node[i, 1])[0]  # 找出第二列每个数字在第一列出现的位置
    temp4 = np.where(node[:, 1] == node[i, 1])[0]  # 找出第二列每个数字在第二列出现的位置
    temp = np.unique(np.intersect1d(np.arange(i + 1, node.shape[0]), np.concatenate((temp1, temp2, temp3, temp4))))
    if len(temp) > 0:
        adjacency[i, temp] = 1
        adjacency[temp, i] = 1
row_sums = np.sum(adjacency, axis=1)

# 找到全零行的索引
zero_row_indices = np.where(row_sums == 0)[0]

from sklearn.cluster import KMeans

N = Initial_partitions  # 设置聚类数目

# 利用 K-Means 算法对 yanlinks 矩阵的第 7 列和第 8 列（即经度和纬度）进行聚类，
# 将样本分成 N 类，idx是一个N x 2的矩阵，其中N是聚类数目。
# idx的每一行就是一个聚类中心，其中第一列是该中心的经度，第二列是该中心的纬度。
# 在计算每个点到聚类中心的距离时，就需要用到idx的值。
Cluster_Label, idx = KMeans(n_clusters=N).fit(yanlinks[:, [6, 7]]).labels_, KMeans(n_clusters=N).fit(
    yanlinks[:, [6, 7]]).cluster_centers_
dis = 111000 * np.sqrt(
    (yanlinks[:, 6] - idx[:, 0].reshape(N, 1)) ** 2 + (yanlinks[:, 7] - idx[:, 1].reshape(N, 1)) ** 2)

# 找到每个点最近的聚类中心，mm是最小值，nn是最小值在向量的索引
mm, nn = np.min(dis, axis=1, keepdims=True), np.argmin(dis, axis=1)

data = links[:, 4]  # links第五行是路的长度
if data.size > 0:
    m = data.shape[0]
    n = 1
else:
    m, n = 0, 1

pattern = np.zeros((m, n))  # zeros(m,n+1)返回由零组成的m×(n+1)数组
pattern[:, 0] = data  # 前n列为data中的数据
pattern = np.hstack((pattern, np.zeros((len(pattern), 1))))
pattern[:, 1] = -1
center = np.zeros((N, n))  # 初始化聚类中心
pattern[:, :n] = data.reshape(-1, n)
center = np.hstack((center, np.zeros((len(center), 1))))
# 初始化聚类中心
for x in range(0, N):
    center[x, 1] = nn[x]
    center[x, 0] = data[int(center[x, 1])]
    pattern[int(center[x, 1]), 1] = x

# 初始化距离和计数
distance = np.zeros(N)
num = np.zeros(N)

# 初始化新的聚类中心
new_center = np.zeros((N, n))

unassigned_links = 2
while unassigned_links > 0:
    print(unassigned_links)

    for x in range(0, Initial_partitions):
        try:
            selected_links = adjacency[pattern[:, 1] == x, :]
            unassigned_roads = np.where(np.sum(selected_links, axis=0) > 0)[0]
            selected_links = np.where(pattern[:, 1] > -1)[0]
            unassigned_roads = np.setdiff1d(unassigned_roads, selected_links)  # bound 是一个向量，表示与聚类 x 相关的未被分配到聚类中的道路的编号。
            selected_links = np.where(pattern[:, 1] == x)[0]  # 这里的yisou表示已经被分配到的道路编号
            bus = []

            road_evaluation = np.zeros((len(unassigned_roads), 2))
            for unassigned_road_index in range(len(unassigned_roads)):
                selected_links_lengths_float = (pattern[selected_links, 0]).tolist()
                unassigned_road_length_array = (pattern[unassigned_roads[unassigned_road_index], 0])
                unassigned_road_length_array = [unassigned_road_length_array]
                abrr = selected_links_lengths_float + unassigned_road_length_array
                road_evaluation[unassigned_road_index, 0] = np.var(abrr, ddof=1)
                aas = yanlinks[yanlinks[:, 4] == unassigned_roads[unassigned_road_index] + 1, 6:8]
                road_evaluation[unassigned_road_index, 1] = 111000 * np.sqrt(np.sum(
                    (yanlinks[yanlinks[:, 4] == unassigned_roads[unassigned_road_index] + 1, 6:8] - idx[x, :]) ** 2))

            if road_evaluation.shape[0] > 1:
                m, n = TOPSIS(road_evaluation)  # bestxuhao最优方案的序号，bestgoal最优得分
            else:
                n = 0

            # pattern[unassigned_roads[n - 1], 1] = x
            pattern[unassigned_roads[n], 1] = x
        except:
            continue
    unassigned_links = np.sum(pattern[:, 1] == -1)
# 因为我的pattern是从0到39的编号，所以要变成1到40
pattern[:, 1] = pattern[:, 1] + 1


yanlinks[:, 3] = links[:, 9]
yanlinks[:, 10] = pattern[:, 1]

data_path = r''
df2 = pd.read_csv(data_path + 'links_processed.csv')
zero_rows = yanlinks[:, 10] == 0
# 获取已删除行的索引
deleted_rows_indices = np.where(zero_rows)[0]

# 从 links 中删除 deleted_rows_indices 中指定的行
df2 = df2.drop(deleted_rows_indices, errors='ignore')

df2.to_csv(data_path + 'links_test1.csv', index=False)

yanlinks = yanlinks[yanlinks[:, 10] != 0]
yanlinks = yanlinks[yanlinks[:, 10] != -1, :]

road = np.unique(np.concatenate((yanlinks[:, 1], yanlinks[:, 0]), axis=0))

adjacency = np.zeros((len(road), len(road)))
adregion = np.zeros((int(np.max(yanlinks[:, 4])), int(np.max(yanlinks[:, 4]))))

for i in range(len(yanlinks[:, 0])):
    temp1 = np.where(node[:, 0] == node[i, 0])[0]
    temp2 = np.where(node[:, 1] == node[i, 0])[0]
    temp3 = np.where(node[:, 0] == node[i, 1])[0]
    temp4 = np.where(node[:, 1] == node[i, 1])[0]
    temp = np.unique(np.intersect1d(np.arange(i + 1, node.shape[0]), np.concatenate((temp1, temp2, temp3, temp4))))
    if len(temp) > 0:
        adregion[i, temp] = 1
        adregion[temp, i] = 1

# 给adregion矩阵乘上权重（道路的分组编号）
for i in range(len(yanlinks[:, 1])):
    # print(adregion[:, int(yanlinks[i, 4])])
    # print(int(yanlinks[i, 10]))
    adregion[:, int(yanlinks[i, 4]) - 1] = adregion[:, int(yanlinks[i, 4]) - 1] * int(yanlinks[i, 10])

subregion_adj = np.zeros((Initial_partitions, Initial_partitions))

# 计算adregion中的每个元素出现的频率(判断是强相关还是弱相关）

for i in range(len(adregion[:, 1])):

    a = adregion[i, :]
    a = np.unique(a)
    a = a[a != 0]

    if a.size > 0:
        x = 1
        y = a.shape[0]
    else:
        x, y = 0, 1
    if y > 1:
        for j in range(len(a)):
            for u in range(len(a)):
                if j != u:
                    # subregion_adj表示子区域的邻接关系，其中数值的大小表示区域之间的相关程度
                    subregion_adj[int(a[j]) - 1, int(a[u]) - 1] += 1
                    subregion_adj[int(a[u]) - 1, int(a[j]) - 1] += 1

# 计算后存到directed_adjacency_matrix里
directed_adjacency_matrix = subregion_adj.copy()
# 对于子区域相关程度处于弱相关的邻接关系进行忽略
min_value = np.min(np.max(subregion_adj, axis=0)) - 2
subregion_adj[subregion_adj < min_value] = 0
subregion_adj[subregion_adj > 1] = 1
directed_adjacency_matrix[directed_adjacency_matrix > 1] = 1
unique_values, unique_indices = np.unique(yanlinks[:, 10], return_index=True)

Asb = 0  # 计算平均相似性
for i in unique_values:
    wu = np.where(subregion_adj[int(i) - 1, :] == 1)  # wu是元组
    smrjj_divide_smrjj_ = 0
    # 0726

    wu_1 = wu[0]

    for j in wu_1:

        selected_values_list = [yanlinks[yanlinks[:, 10] == j + 1][:, 5]]

        # 主区域邻接的一个区域速度均值与方差
        selected_values = np.array(selected_values_list)
        average = np.mean(selected_values)
        variance = np.var(selected_values)

        # 计算主区域的速度均值与方差
        selected_values1 = yanlinks[yanlinks[:, 10] == i][:, 5]
        average1 = np.mean(selected_values1)
        variance1 = np.var(selected_values1)

        smrjj = 2 * variance1  # jj情况下的smrjj
        smrjj_ = variance + variance1 + (average - average1) ** 2
        smrjj_divide_smrjj_one = smrjj / smrjj_
        smrjj_divide_smrjj_ += smrjj_divide_smrjj_one

    num_elements = len(wu[0])  # 计算分母NE
    Asb_one = smrjj_divide_smrjj_ / num_elements
    Asb += Asb_one
Asb=Asb/Initial_partitions
print('Asb=', Asb)

Tvb = 0
for i in unique_values:
    selected_values = yanlinks[yanlinks[:, 10] == i][:, 5]
    variance = np.var(selected_values)
    Tvb += variance

print('Tvb=', Tvb)