192 lines
12 KiB
Python
192 lines
12 KiB
Python
import pdfplumber
|
||
import pandas as pd
|
||
import numpy as np
|
||
|
||
pdf = pdfplumber.open('军事体育五项标准.pdf', password='456456')
|
||
|
||
# 定义各指标表格页
|
||
manBodyShape_Page_list = [0, 1]
|
||
womanBodyShape_Page_list = [2, 3]
|
||
manBodyChinUpsPushUps_list = [5]
|
||
manSitUps_list = [6]
|
||
man60mSerpentineRun_list = [7]
|
||
man3000mRun_list = [8]
|
||
womanBodyArmDrapePushUps_list = [9]
|
||
womanSitUps_list = [10]
|
||
woman60mSerpentineRun_list = [11]
|
||
woman3000mRun_list = [12]
|
||
|
||
# 解析manBodyShape
|
||
manBodyShape_comparisonTable_list = [list(filter(None, _i)) for item in manBodyShape_Page_list for _i in
|
||
pdf.pages[item].extract_table()]
|
||
manBodyShape_columnName_list = manBodyShape_comparisonTable_list[0][2:]
|
||
manBodyShape_columnName_list[0] = '0~24岁'
|
||
manBodyShape_comparisonTable_ar = np.array([item for item in manBodyShape_comparisonTable_list if
|
||
'公式' not in item and '说明' not in item and r'24岁以下' not in item])
|
||
manBodyShape_rowName_ar = np.array([item.replace(' ', '') for item in manBodyShape_comparisonTable_ar[:, 0]]).astype(
|
||
float)
|
||
manBodyShape_data_ar = np.array([[[float(_j) for _j in _i.replace(' ', '').split("~")] for _i in item] for item in
|
||
manBodyShape_comparisonTable_ar[:, 1:]])
|
||
manBodyShape_df = pd.DataFrame(columns=manBodyShape_columnName_list, index=manBodyShape_rowName_ar)
|
||
for index, col in enumerate(manBodyShape_columnName_list):
|
||
manBodyShape_df[col] = list(manBodyShape_data_ar[:, index])
|
||
manBodyShape_df.to_excel('manBodyShape.xlsx')
|
||
|
||
# 解析womanBodyShape
|
||
womanBodyShape_comparisonTable_list = [list(filter(None, _i)) for item in womanBodyShape_Page_list for _i in
|
||
pdf.pages[item].extract_table()]
|
||
womanBodyShape_columnName_list = womanBodyShape_comparisonTable_list[0][2:]
|
||
womanBodyShape_columnName_list[0] = '0~24岁'
|
||
womanBodyShape_comparisonTable_ar = np.array([item for item in womanBodyShape_comparisonTable_list if
|
||
'公式' not in item and '说明' not in item and r'24岁以下' not in item])
|
||
womanBodyShape_rowName_ar = np.array(
|
||
[item.replace(' ', '') for item in womanBodyShape_comparisonTable_ar[:, 0]]).astype(
|
||
float)
|
||
womanBodyShape_data_ar = np.array([[[float(_j) for _j in _i.replace(' ', '').split("~")] for _i in item] for item in
|
||
womanBodyShape_comparisonTable_ar[:, 1:]])
|
||
womanBodyShape_df = pd.DataFrame(columns=womanBodyShape_columnName_list, index=womanBodyShape_rowName_ar)
|
||
for index, col in enumerate(womanBodyShape_columnName_list):
|
||
womanBodyShape_df[col] = list(womanBodyShape_data_ar[:, index])
|
||
womanBodyShape_df.to_excel('womanBodyShape.xlsx')
|
||
|
||
# 解析manBodyChinUps&PushUps
|
||
manBodyChinUpsPushUps_comparisonTable_list = [list(filter(None, _i)) for item in manBodyChinUpsPushUps_list for _i in
|
||
pdf.pages[item].extract_table()]
|
||
manBodyChinUpsPushUps_columnName_list = manBodyChinUpsPushUps_comparisonTable_list[0][1:]
|
||
manBodyChinUpsPushUps_columnName_list[0] = "0~24"
|
||
manBodyChinUpsPushUps_comparisonTable_ar = np.array([item for item in manBodyChinUpsPushUps_comparisonTable_list if
|
||
'25~27' not in item and '备注' not in item])
|
||
manBodyChinUpsPushUps_rowName_ar = np.array(
|
||
[item.replace(' ', '') for item in manBodyChinUpsPushUps_comparisonTable_ar[:, 0]]).astype(
|
||
int)
|
||
manBodyChinUpsPushUps_data_ar = np.array([[int(_i.replace(' ', '')) for _i in item] for item in
|
||
manBodyChinUpsPushUps_comparisonTable_ar[:, 1:]])
|
||
manBodyChinUpsPushUps_df = pd.DataFrame(columns=manBodyChinUpsPushUps_columnName_list,
|
||
index=manBodyChinUpsPushUps_rowName_ar)
|
||
for index, col in enumerate(manBodyChinUpsPushUps_columnName_list):
|
||
manBodyChinUpsPushUps_df[col] = list(manBodyChinUpsPushUps_data_ar[:, index])
|
||
manBodyChinUpsPushUps_df.to_excel('manBodyChinUpsPushUps.xlsx')
|
||
|
||
# 解析manSitUps
|
||
manSitUps_comparisonTable_list = [list(filter(None, _i)) for item in manSitUps_list for _i in
|
||
pdf.pages[item].extract_table()]
|
||
manSitUps_columnName_list = manSitUps_comparisonTable_list[0][1:]
|
||
manSitUps_columnName_list[0] = "0~24"
|
||
manSitUps_comparisonTable_ar = np.array([item for item in manSitUps_comparisonTable_list if
|
||
'25~27' not in item and '备注' not in item])
|
||
manSitUps_rowName_ar = np.array(
|
||
[item.replace(' ', '') for item in manSitUps_comparisonTable_ar[:, 0]]).astype(
|
||
int)
|
||
manSitUps_data_ar = np.array([[int(_i.replace(' ', '')) for _i in item] for item in
|
||
manSitUps_comparisonTable_ar[:, 1:]])
|
||
manSitUps_df = pd.DataFrame(columns=manSitUps_columnName_list,
|
||
index=manSitUps_rowName_ar)
|
||
for index, col in enumerate(manSitUps_columnName_list):
|
||
manSitUps_df[col] = list(manSitUps_data_ar[:, index])
|
||
manSitUps_df.to_excel('manSitUps.xlsx')
|
||
|
||
# 解析man60mSerpentineRun
|
||
man60mSerpentineRun_comparisonTable_list = [list(filter(None, _i)) for item in man60mSerpentineRun_list for _i in
|
||
pdf.pages[item].extract_table()]
|
||
man60mSerpentineRun_columnName_list = man60mSerpentineRun_comparisonTable_list[0][1:]
|
||
man60mSerpentineRun_columnName_list[0] = "0~24"
|
||
man60mSerpentineRun_comparisonTable_ar = np.array([item for item in man60mSerpentineRun_comparisonTable_list if
|
||
'25~27' not in item and '备注' not in item])
|
||
man60mSerpentineRun_rowName_ar = np.array(
|
||
[item.replace(' ', '') for item in man60mSerpentineRun_comparisonTable_ar[:, 0]]).astype(int)
|
||
man60mSerpentineRun_data_ar = np.array([[int(_i.split("″")[0]) + int(_i.split("″")[1]) * 0.1 for _i in item] for item in
|
||
man60mSerpentineRun_comparisonTable_ar[:, 1:]])
|
||
man60mSerpentineRun_df = pd.DataFrame(columns=man60mSerpentineRun_columnName_list, index=man60mSerpentineRun_rowName_ar)
|
||
for index, col in enumerate(man60mSerpentineRun_columnName_list):
|
||
man60mSerpentineRun_df[col] = list(man60mSerpentineRun_data_ar[:, index])
|
||
man60mSerpentineRun_df.to_excel('man60mSerpentineRun.xlsx')
|
||
|
||
# 解析man3000mRun
|
||
man3000mRun_comparisonTable_list = [list(filter(None, _i)) for item in man3000mRun_list for _i in
|
||
pdf.pages[item].extract_table()]
|
||
man3000mRun_columnName_list = man3000mRun_comparisonTable_list[0][1:]
|
||
man3000mRun_columnName_list[0] = "0~24"
|
||
man3000mRun_comparisonTable_ar = np.array([item for item in man3000mRun_comparisonTable_list if
|
||
'25~27' not in item and '备注' not in item])
|
||
man3000mRun_rowName_ar = np.array(
|
||
[item.replace(' ', '') for item in man3000mRun_comparisonTable_ar[:, 0]]).astype(int)
|
||
man3000mRun_data_ar = np.array([[int(_i.split("′")[0]) * 60 + int(_i.split("′")[1][:-1]) for _i in item] for item in
|
||
man3000mRun_comparisonTable_ar[:, 1:]])
|
||
man3000mRun_df = pd.DataFrame(columns=man3000mRun_columnName_list, index=man3000mRun_rowName_ar)
|
||
for index, col in enumerate(man3000mRun_columnName_list):
|
||
man3000mRun_df[col] = list(man3000mRun_data_ar[:, index])
|
||
man3000mRun_df.to_excel('man3000mRun.xlsx')
|
||
|
||
# 解析womanBodyArmDrape&PushUps
|
||
womanBodyArmDrapePushUps_comparisonTable_list = [list(filter(None, _i)) for item in womanBodyArmDrapePushUps_list for _i
|
||
in pdf.pages[item].extract_table()]
|
||
womanBodyArmDrapePushUps_columnName_list = womanBodyArmDrapePushUps_comparisonTable_list[0][1:]
|
||
womanBodyArmDrapePushUps_columnName_list[0] = "0~24"
|
||
womanBodyArmDrapePushUps_comparisonTable_ar = np.array(
|
||
[item for item in womanBodyArmDrapePushUps_comparisonTable_list if '25~27' not in item and '备注' not in item])
|
||
womanBodyArmDrapePushUps_rowName_ar = np.array(
|
||
[item.replace(' ', '') for item in womanBodyArmDrapePushUps_comparisonTable_ar[:, 0]]).astype(int)
|
||
womanArmDrape_data_ar = np.array([[int(_i.split("′")[0]) * 60 + int(_i.split("′")[1][:-1]) if len(
|
||
_i.split("′")) > 1 else int(_i[:-1]) for _i in item] for item in
|
||
womanBodyArmDrapePushUps_comparisonTable_ar[:, 1:7]])
|
||
womanPushUps_data_ar = np.array(
|
||
[[int(_i.replace(' ', '')) for _i in item] for item in womanBodyArmDrapePushUps_comparisonTable_ar[:, 7:]])
|
||
womanBodyArmDrapePushUps_df = pd.DataFrame(columns=womanBodyArmDrapePushUps_columnName_list,
|
||
index=womanBodyArmDrapePushUps_rowName_ar)
|
||
womanBodyArmDrapePushUps_data_ar = np.hstack((womanArmDrape_data_ar, womanPushUps_data_ar))
|
||
for index, col in enumerate(womanBodyArmDrapePushUps_columnName_list):
|
||
womanBodyArmDrapePushUps_df[col] = list(womanBodyArmDrapePushUps_data_ar[:, index])
|
||
womanBodyArmDrapePushUps_df.to_excel('womanBodyArmDrapePushUps.xlsx')
|
||
|
||
# 解析womanSitUps
|
||
womanSitUps_comparisonTable_list = [list(filter(None, _i)) for item in womanSitUps_list for _i in
|
||
pdf.pages[item].extract_table()]
|
||
womanSitUps_columnName_list = womanSitUps_comparisonTable_list[0][1:]
|
||
womanSitUps_columnName_list[0] = "0~24"
|
||
womanSitUps_comparisonTable_ar = np.array([item for item in womanSitUps_comparisonTable_list if
|
||
'25~27' not in item and '备注' not in item])
|
||
womanSitUps_rowName_ar = np.array(
|
||
[item.replace(' ', '') for item in womanSitUps_comparisonTable_ar[:, 0]]).astype(
|
||
int)
|
||
womanSitUps_data_ar = np.array([[int(_i.replace(' ', '')) for _i in item] for item in
|
||
womanSitUps_comparisonTable_ar[:, 1:]])
|
||
womanSitUps_df = pd.DataFrame(columns=womanSitUps_columnName_list,
|
||
index=womanSitUps_rowName_ar)
|
||
for index, col in enumerate(womanSitUps_columnName_list):
|
||
womanSitUps_df[col] = list(womanSitUps_data_ar[:, index])
|
||
womanSitUps_df.to_excel('womanSitUps.xlsx')
|
||
|
||
# 解析woman60mSerpentineRun
|
||
woman60mSerpentineRun_comparisonTable_list = [list(filter(None, _i)) for item in woman60mSerpentineRun_list for _i in
|
||
pdf.pages[item].extract_table()]
|
||
woman60mSerpentineRun_columnName_list = woman60mSerpentineRun_comparisonTable_list[0][1:]
|
||
woman60mSerpentineRun_columnName_list[0] = "0~24"
|
||
woman60mSerpentineRun_comparisonTable_ar = np.array([item for item in woman60mSerpentineRun_comparisonTable_list if
|
||
'25~27' not in item and '备注' not in item])
|
||
woman60mSerpentineRun_rowName_ar = np.array(
|
||
[item.replace(' ', '') for item in woman60mSerpentineRun_comparisonTable_ar[:, 0]]).astype(int)
|
||
woman60mSerpentineRun_data_ar = np.array(
|
||
[[int(_i.split("″")[0]) + int(_i.split("″")[1]) * 0.1 for _i in item] for item in
|
||
woman60mSerpentineRun_comparisonTable_ar[:, 1:]])
|
||
woman60mSerpentineRun_df = pd.DataFrame(columns=woman60mSerpentineRun_columnName_list,
|
||
index=woman60mSerpentineRun_rowName_ar)
|
||
for index, col in enumerate(woman60mSerpentineRun_columnName_list):
|
||
woman60mSerpentineRun_df[col] = list(woman60mSerpentineRun_data_ar[:, index])
|
||
woman60mSerpentineRun_df.to_excel('woman60mSerpentineRun.xlsx')
|
||
|
||
# 解析woman3000mRun
|
||
woman3000mRun_comparisonTable_list = [list(filter(None, _i)) for item in woman3000mRun_list for _i in
|
||
pdf.pages[item].extract_table()]
|
||
woman3000mRun_columnName_list = woman3000mRun_comparisonTable_list[0][1:]
|
||
woman3000mRun_columnName_list[0] = "0~24"
|
||
woman3000mRun_comparisonTable_ar = np.array([item for item in woman3000mRun_comparisonTable_list if
|
||
'25~27' not in item and '备注' not in item])
|
||
woman3000mRun_rowName_ar = np.array(
|
||
[item.replace(' ', '') for item in woman3000mRun_comparisonTable_ar[:, 0]]).astype(int)
|
||
woman3000mRun_data_ar = np.array([[int(_i.split("′")[0]) * 60 + int(_i.split("′")[1][:-1]) for _i in item] for item in
|
||
woman3000mRun_comparisonTable_ar[:, 1:]])
|
||
woman3000mRun_df = pd.DataFrame(columns=woman3000mRun_columnName_list, index=woman3000mRun_rowName_ar)
|
||
for index, col in enumerate(woman3000mRun_columnName_list):
|
||
woman3000mRun_df[col] = list(woman3000mRun_data_ar[:, index])
|
||
woman3000mRun_df.to_excel('woman3000mRun.xlsx')
|