189 lines
7.1 KiB
Python
189 lines
7.1 KiB
Python
|
import pandas as pd
|
|||
|
import matplotlib.pyplot as plt
|
|||
|
from sklearn.cluster import KMeans
|
|||
|
from sklearn.preprocessing import StandardScaler
|
|||
|
import numpy as np
|
|||
|
|
|||
|
Initial_partitions=60
|
|||
|
# 加载数据
|
|||
|
df = pd.read_csv('links_processed.csv', usecols=[0, 1, 2, 3, 4])
|
|||
|
df.columns = ['start_lat', 'start_long', 'end_lat', 'end_long', 'speed']
|
|||
|
|
|||
|
|
|||
|
# 计算路段的中心点
|
|||
|
df['center_lat'] = ((df['start_lat'] + df['end_lat']) / 2).round(7)
|
|||
|
df['center_long'] = ((df['start_long'] + df['end_long']) / 2).round(7)
|
|||
|
|
|||
|
# 提取用于聚类的特征
|
|||
|
features = df[['center_lat', 'center_long']]
|
|||
|
|
|||
|
# 数据标准化
|
|||
|
scaler = StandardScaler()
|
|||
|
scaled_features = scaler.fit_transform(features)
|
|||
|
|
|||
|
# 运行KMeans算法
|
|||
|
kmeans = KMeans(n_clusters=Initial_partitions, n_init=10) # 假设我们想要划分为40个区域
|
|||
|
kmeans.fit(scaled_features)
|
|||
|
# 将聚类结果添加到原始数据中
|
|||
|
df['cluster'] = kmeans.labels_
|
|||
|
df['cluster'] = df['cluster'] + 1
|
|||
|
df=df.to_numpy()
|
|||
|
|
|||
|
links = pd.read_csv('links_processed.csv')
|
|||
|
links = links.to_numpy()
|
|||
|
node = np.concatenate((links[:, :2], links[:, 2:4]), axis=0) # np.concatenate 函数会将这两个子数组沿着轴 0 连接起来;
|
|||
|
# axis 是指在数组操作时沿着哪个轴进行操作。当axis=0时,表示在第一个维度上进行拼接操作。这里就是纵轴
|
|||
|
|
|||
|
# 这里是给道路起点和终点标注序列,也就是路口表注序列,因为一个路口可以是好几个道路的起点或终点,所以同一路口就会有同样的标记
|
|||
|
noi = 1
|
|||
|
node = np.hstack((node, np.zeros((len(node), 1))))
|
|||
|
|
|||
|
for i in range(node.shape[0]): # node.shape[0] 是指 node 数组的第一维大小,即 node 数组的行数
|
|||
|
|
|||
|
a = np.where(node[:i, 0] == node[i, 0])[0]
|
|||
|
b = np.where(node[:i, 1] == node[i, 1])[0]
|
|||
|
c = np.intersect1d(a, b) # intersect1d 返回两个数组的交集
|
|||
|
if c.size > 0:
|
|||
|
x = c.shape[0]
|
|||
|
y = 1
|
|||
|
else:
|
|||
|
x, y = 0, 1
|
|||
|
# 在 node 数组的最后添加一列全为0的列,并将添加后的新数组重新赋值给 node
|
|||
|
if x > 0 and y > 0:
|
|||
|
node[i, 2] = node[min(c), 2] # 如果c是矩阵,则min(A)是包含每一列的最小值的行向量
|
|||
|
else:
|
|||
|
node[i, 2] = noi
|
|||
|
noi += 1
|
|||
|
node = np.concatenate((node[:int(len(node) / 2), 2].reshape(-1, 1), node[int(len(node) / 2):, 2].reshape(-1, 1)),
|
|||
|
axis=1)
|
|||
|
|
|||
|
# 这里的links多加了一行才能yanlinks,但这样yanlinks就不对了
|
|||
|
links = np.hstack((links, np.zeros((len(links), 1))))
|
|||
|
links = np.hstack((links, np.zeros((len(links), 1))))
|
|||
|
links = np.hstack((links, np.zeros((len(links), 1))))
|
|||
|
yanlinks = np.concatenate((node, links[:, [5, 6, 7, 4, 0, 1, 2, 3]], np.zeros((len(links), 4))), axis=1)
|
|||
|
yanlinks[:, 4] = np.arange(1, len(yanlinks) + 1)
|
|||
|
|
|||
|
road = np.arange(1, node.shape[0] + 1)
|
|||
|
adjacency = np.zeros((len(road), len(road)))
|
|||
|
|
|||
|
# 初始化分区
|
|||
|
|
|||
|
for i in range(len(road)):
|
|||
|
temp1 = np.where(node[:, 0] == node[i, 0])[0] # 找出第一列每个数字在第一列出现的位置
|
|||
|
temp2 = np.where(node[:, 1] == node[i, 0])[0] # 找出第一列每个数字在第二列出现的位置
|
|||
|
temp3 = np.where(node[:, 0] == node[i, 1])[0] # 找出第二列每个数字在第一列出现的位置
|
|||
|
temp4 = np.where(node[:, 1] == node[i, 1])[0] # 找出第二列每个数字在第二列出现的位置
|
|||
|
temp = np.unique(np.intersect1d(np.arange(i + 1, node.shape[0]), np.concatenate((temp1, temp2, temp3, temp4))))
|
|||
|
if len(temp) > 0:
|
|||
|
adjacency[i, temp] = 1
|
|||
|
adjacency[temp, i] = 1
|
|||
|
row_sums = np.sum(adjacency, axis=1)
|
|||
|
|
|||
|
# 找到全零行的索引
|
|||
|
zero_row_indices = np.where(row_sums == 0)[0]
|
|||
|
|
|||
|
|
|||
|
yanlinks[:, 3] = links[:, 9]
|
|||
|
yanlinks[:, 10] = df[:, 7]
|
|||
|
yanlinks = yanlinks[yanlinks[:, 10] != 0]
|
|||
|
yanlinks = yanlinks[yanlinks[:, 10] != -1, :]
|
|||
|
|
|||
|
road = np.unique(np.concatenate((yanlinks[:, 1], yanlinks[:, 0]), axis=0))
|
|||
|
|
|||
|
adjacency = np.zeros((len(road), len(road)))
|
|||
|
adregion = np.zeros((int(np.max(yanlinks[:, 4])), int(np.max(yanlinks[:, 4]))))
|
|||
|
|
|||
|
for i in range(len(yanlinks[:, 0])):
|
|||
|
temp1 = np.where(node[:, 0] == node[i, 0])[0]
|
|||
|
temp2 = np.where(node[:, 1] == node[i, 0])[0]
|
|||
|
temp3 = np.where(node[:, 0] == node[i, 1])[0]
|
|||
|
temp4 = np.where(node[:, 1] == node[i, 1])[0]
|
|||
|
temp = np.unique(np.intersect1d(np.arange(i + 1, node.shape[0]), np.concatenate((temp1, temp2, temp3, temp4))))
|
|||
|
if len(temp) > 0:
|
|||
|
adregion[i, temp] = 1
|
|||
|
adregion[temp, i] = 1
|
|||
|
# adregion矩阵表示路段之间的邻接关系
|
|||
|
np.save('adregion.npy', adregion)
|
|||
|
# 给adregion矩阵乘上权重(道路的分组编号)
|
|||
|
for i in range(len(yanlinks[:, 1])):
|
|||
|
# print(adregion[:, int(yanlinks[i, 4])])
|
|||
|
# print(int(yanlinks[i, 10]))
|
|||
|
adregion[:, int(yanlinks[i, 4]) - 1] = adregion[:, int(yanlinks[i, 4]) - 1] * int(yanlinks[i, 10])
|
|||
|
|
|||
|
subregion_adj = np.zeros((Initial_partitions, Initial_partitions))
|
|||
|
|
|||
|
# 计算adregion中的每个元素出现的频率(判断是强相关还是弱相关)
|
|||
|
|
|||
|
for i in range(len(adregion[:, 1])):
|
|||
|
|
|||
|
a = adregion[i, :]
|
|||
|
a = np.unique(a)
|
|||
|
a = a[a != 0]
|
|||
|
|
|||
|
if a.size > 0:
|
|||
|
x = 1
|
|||
|
y = a.shape[0]
|
|||
|
else:
|
|||
|
x, y = 0, 1
|
|||
|
if y > 1:
|
|||
|
for j in range(len(a)):
|
|||
|
for u in range(len(a)):
|
|||
|
if j != u:
|
|||
|
# subregion_adj表示子区域的邻接关系,其中数值的大小表示区域之间的相关程度
|
|||
|
subregion_adj[int(a[j]) - 1, int(a[u]) - 1] += 1
|
|||
|
subregion_adj[int(a[u]) - 1, int(a[j]) - 1] += 1
|
|||
|
|
|||
|
# 计算后存到directed_adjacency_matrix里
|
|||
|
directed_adjacency_matrix = subregion_adj.copy()
|
|||
|
# 对于子区域相关程度处于弱相关的邻接关系进行忽略
|
|||
|
min_value = np.min(np.max(subregion_adj, axis=0)) - 2
|
|||
|
subregion_adj[subregion_adj < min_value] = 0
|
|||
|
subregion_adj[subregion_adj > 1] = 1
|
|||
|
directed_adjacency_matrix[directed_adjacency_matrix > 1] = 1
|
|||
|
|
|||
|
unique_values, unique_indices = np.unique(yanlinks[:, 10], return_index=True)
|
|||
|
|
|||
|
Asb = 0 # 计算平均相似性
|
|||
|
for i in unique_values:
|
|||
|
wu = np.where(subregion_adj[int(i) - 1, :] == 1) # wu是元组
|
|||
|
smrjj_divide_smrjj_ = 0
|
|||
|
# 0726
|
|||
|
|
|||
|
wu_1 = wu[0]
|
|||
|
|
|||
|
for j in wu_1:
|
|||
|
|
|||
|
selected_values_list = [yanlinks[yanlinks[:, 10] == j + 1][:, 5]]
|
|||
|
|
|||
|
# 主区域邻接的一个区域速度均值与方差
|
|||
|
selected_values = np.array(selected_values_list)
|
|||
|
average = np.mean(selected_values)
|
|||
|
variance = np.var(selected_values)
|
|||
|
|
|||
|
# 计算主区域的速度均值与方差
|
|||
|
selected_values1 = yanlinks[yanlinks[:, 10] == i][:, 5]
|
|||
|
average1 = np.mean(selected_values1)
|
|||
|
variance1 = np.var(selected_values1)
|
|||
|
|
|||
|
smrjj = 2 * variance1 # jj情况下的smrjj
|
|||
|
smrjj_ = variance + variance1 + (average - average1) ** 2
|
|||
|
smrjj_divide_smrjj_one = smrjj / smrjj_
|
|||
|
smrjj_divide_smrjj_ += smrjj_divide_smrjj_one
|
|||
|
|
|||
|
num_elements = len(wu[0]) # 计算分母NE
|
|||
|
Asb_one = smrjj_divide_smrjj_ / num_elements
|
|||
|
Asb += Asb_one
|
|||
|
Asb=Asb/Initial_partitions
|
|||
|
print('Asb=', Asb)
|
|||
|
|
|||
|
Tvb = 0
|
|||
|
for i in unique_values:
|
|||
|
selected_values = yanlinks[yanlinks[:, 10] == i][:, 5]
|
|||
|
variance = np.var(selected_values)
|
|||
|
Tvb += variance
|
|||
|
|
|||
|
print('Tvb=', Tvb)
|
|||
|
# np.save('subregion_adj.npy', subregion_adj)
|
|||
|
# np.save('yanlinks.npy', yanlinks)
|