概述
- 从网站爬取地震信息
- 把爬取到的数据做相应处理
- 把处理好的数据绘制生成html
准备工作
准备数据
- 当前实现是从一个现有的xlsx文件(爬好的数据)里读取数据
- 该文件会放在代码同级目录下
1 2 3 4 5 |
def GetDatas(): data = pandas.ExcelFile(".//地震信息.xlsx") obj = pandas.read_excel(data) obj.columns = ["id", "date", "lon", "lat", "depth", "type", "level", "loc", "incident"] return obj |
经纬度处理
- 就是传入经纬度,调用高德放开的相关接口来获取坐标信息
- 下面是注册,并申请接口的教程
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
def LonLat2Pos(data): cp_data = data.copy() citys = [] provinces = [] url = 'https://restapi.amap.com/v3/geocode/regeo?' for location in cp_data[['lon', 'lat']].values: loc = "{0[0]},{0[1]}".format(location) parm = { 'location': loc, 'key': '2f85f6282750c643366eef4c22bd9306', 'extensions': 'base', 'batch': 'false', 'roadlevel': 0, } req = requests.get(url, params=parm) test_data = req.json()['regeocode'] city = test_data['addressComponent']['city'] province = test_data['addressComponent']['province'] if len(city) == 0: city = province citys.append(city) provinces.append(province) cp_data['city'] = citys cp_data['province'] = provinces return cp_data |
- 把处理后的数据保存到本地
- 原因是处理的过程比较漫长,所以把一次成功的处理结果保存起来,下次直接读会很快
1 2 |
data_pre = LonLat2Pos(data) data_pre.to_csv(".//data_after.csv", index=False) |
申请高德个人开发者
- 到高德开放平台注册个人账号
- 个人身份验证,选择支付宝验证会比较快
- 注册好,登陆进去
- 点击应用管理
- 点击我的应用
- 点击创建新应用(起个名字比如Test,选个类型比如教育),点击新建
- 在Test右边点击添加
- 起个Key名称(比如经纬度转换相关),点Web服务,勾选协议,点提交
- 记住刚才创建的Key(经纬度转换相关)的Key值(一长串数字与英文字符混杂的字符串)
- 网络请求测试
1 2 3 4 5 6 7 8 9 10 11 |
url = 'https://restapi.amap.com/v3/geocode/regeo?' loc = "{0[0]},{0[1]}".format([87.83, 31.78]) parm = { 'location': loc, 'key': '上面让记住的那个Key', 'extensions': 'base', 'batch': 'false', 'roadlevel': 0, } req = requests.get(url, params=parm) test_data = req.json()['regeocode'] |
实现部分
读取数据
- 读取保存了完整信息的data_after
1 2 3 |
def GetAfterDatas(): data = pandas.read_csv(".//data_after.csv") return data |
城市信息处理
1 2 3 4 5 6 |
def ChangeCityInfo(info): info["province"].replace("[]", "其他", inplace=True) info["city"].replace("[]", "其他", inplace=True) info["province"].replace("中华人民共和国", "其他", inplace=True) info["city"].replace("[]", "中华人民共和国", inplace=True) return info |
筛选4.5级
1 2 |
def QueryDatas(data): return data.query('level>=0.0 and province!="其他"').groupby("province") |
html
- 词云图
1 2 3 4 5 6 7 8 |
def WCloud(data): maxCount = data.size().max() data_after = list(data.size().items()) c = (WordCloud() .add("", data_after, word_size_range=[20,55], shape=SymbolType.DIAMOND) .set_global_opts(title_opts=optsss.TitleOpts(title="2010-2022年各省份M>=4.5地震次数(雄越)"), visualmap_opts=optsss.VisualMapOpts(max_=int(maxCount))) .render("熊越---强震次数词云.html") ) |
- 地图
1 2 3 4 5 6 7 8 9 10 |
def Graph_Map(data): maxCount = data.size().max() # data = [(province_to_short(x[0]), x[1]) for x in data] data = list(data.size().items()) c = (Map() .add("强震次数", data, "china",is_map_symbol_show=True,is_roam=True) .set_global_opts(title_opts=optsss.TitleOpts(title="2010-2022年各省份M>=4.5地震次数(熊越)"), visualmap_opts=optsss.VisualMapOpts(max_=int(maxCount), is_piecewise=True) ) .render("雄越---强震次数地图.html") ) |
完整代码
第一次运行
- 准备数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
from pyecharts.charts import Map from pyecharts.charts import WordCloud from pyecharts import options as optsss from pyecharts.globals import SymbolType, ThemeType import pandas import requests import requests def province_to_short(province): P2S = { "北京市": "北京", "天津市": "天津", "重庆市": "重庆", "上海市": "上海", "河北省": "河北", "山西省": "山西", "辽宁省": "辽宁", "吉林省": "吉林", "黑龙江省": "黑龙江", "江苏省": "江苏", "浙江省": "浙江", "安徽省": "安徽", "福建省": "福建", "江西省": "江西", "山东省": "山东", "河南省": "河南", "湖北省": "湖北", "湖南省": "湖南", "广东省": "广东", "海南省": "海南", "四川省": "四川", "贵州省": "贵州", "云南省": "云南", "陕西省": "陕西", "甘肃省": "甘肃", "青海省": "青海", "台湾省": "台湾", "内蒙古自治区": "内蒙古", "广西壮族自治区": "广西", "宁夏回族自治区": "宁夏", "新疆维吾尔自治区": "新疆", "西藏自治区": "西藏", "香港特别行政区": "香港", "澳门特别行政区": "澳门" } return P2S.get(province) def GetDatas(): data = pandas.ExcelFile(".//地震信息.xlsx") obj = pandas.read_excel(data) obj.columns = ["id", "date", "lon", "lat", "depth", "type", "level", "loc", "incident"] return obj def GetAfterDatas(): data = pandas.read_csv(".//data_after.csv") return data def ChangeCityInfo(info): info["province"].replace("[]", "其他", inplace=True) info["city"].replace("[]", "其他", inplace=True) info["province"].replace("中华人民共和国", "其他", inplace=True) info["city"].replace("[]", "中华人民共和国", inplace=True) return info def QueryDatas(data): return data.query('level>=0.0 and province!="其他"').groupby("province") def WCloud(data): maxCount = data.size().max() data_after = list(data.size().items()) c = (WordCloud() .add("", data_after, word_size_range=[20,55], shape=SymbolType.DIAMOND) .set_global_opts(title_opts=optsss.TitleOpts(title="2010-2022年各省份M>=4.5地震次数(雄越)"), visualmap_opts=optsss.VisualMapOpts(max_=int(maxCount))) .render("熊越---强震次数词云.html") ) def LonLat2Pos(data): cp_data = data.copy() citys = [] provinces = [] url = 'https://restapi.amap.com/v3/geocode/regeo?' for location in cp_data[['lon', 'lat']].values: loc = "{0[0]},{0[1]}".format(location) parm = { 'location': loc, 'key': '2f85f6282750c643366eef4c22bd9306233', 'extensions': 'base', 'batch': 'false', 'roadlevel': 0, } req = requests.get(url, params=parm) test_data = req.json()['regeocode'] city = test_data['addressComponent']['city'] province = test_data['addressComponent']['province'] if len(city) == 0: city = province citys.append(city) provinces.append(province) cp_data['city'] = citys cp_data['province'] = provinces return cp_data def Graph_Map(data): maxCount = data.size().max() # data = [(province_to_short(x[0]), x[1]) for x in data] data = list(data.size().items()) c = (Map() .add("强震次数", data, "china",is_map_symbol_show=True,is_roam=True) .set_global_opts(title_opts=optsss.TitleOpts(title="2010-2022年各省份M>=4.5地震次数(熊越)"), visualmap_opts=optsss.VisualMapOpts(max_=int(maxCount), is_piecewise=True) ) .render("雄越---强震次数地图.html") ) # TestLonLat2Pos(data) if __name__ == '__main__': # 1-准备数据 data = GetDatas() # 2-经纬度处理 data_pre = LonLat2Pos(data) data_pre.to_csv(".//data_after.csv", index=False) |
第二次运行
- 利用第一次运行后得到的数据,生成html文件
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
from pyecharts.charts import Map from pyecharts.charts import WordCloud from pyecharts import options as optsss from pyecharts.globals import SymbolType, ThemeType import pandas import requests import requests def province_to_short(province): P2S = { "北京市": "北京", "天津市": "天津", "重庆市": "重庆", "上海市": "上海", "河北省": "河北", "山西省": "山西", "辽宁省": "辽宁", "吉林省": "吉林", "黑龙江省": "黑龙江", "江苏省": "江苏", "浙江省": "浙江", "安徽省": "安徽", "福建省": "福建", "江西省": "江西", "山东省": "山东", "河南省": "河南", "湖北省": "湖北", "湖南省": "湖南", "广东省": "广东", "海南省": "海南", "四川省": "四川", "贵州省": "贵州", "云南省": "云南", "陕西省": "陕西", "甘肃省": "甘肃", "青海省": "青海", "台湾省": "台湾", "内蒙古自治区": "内蒙古", "广西壮族自治区": "广西", "宁夏回族自治区": "宁夏", "新疆维吾尔自治区": "新疆", "西藏自治区": "西藏", "香港特别行政区": "香港", "澳门特别行政区": "澳门" } return P2S.get(province) def GetDatas(): data = pandas.ExcelFile(".//地震信息.xlsx") obj = pandas.read_excel(data) obj.columns = ["id", "date", "lon", "lat", "depth", "type", "level", "loc", "incident"] return obj def GetAfterDatas(): data = pandas.read_csv(".//data_after.csv") return data def ChangeCityInfo(info): info["province"].replace("[]", "其他", inplace=True) info["city"].replace("[]", "其他", inplace=True) info["province"].replace("中华人民共和国", "其他", inplace=True) info["city"].replace("[]", "中华人民共和国", inplace=True) return info def QueryDatas(data): return data.query('level>=0.0 and province!="其他"').groupby("province") def WCloud(data): maxCount = data.size().max() data_after = list(data.size().items()) c = (WordCloud() .add("", data_after, word_size_range=[20,55], shape=SymbolType.DIAMOND) .set_global_opts(title_opts=optsss.TitleOpts(title="2010-2022年各省份M>=4.5地震次数(雄越)"), visualmap_opts=optsss.VisualMapOpts(max_=int(maxCount))) .render("熊越---强震次数词云.html") ) def LonLat2Pos(data): cp_data = data.copy() citys = [] provinces = [] url = 'https://restapi.amap.com/v3/geocode/regeo?' for location in cp_data[['lon', 'lat']].values: loc = "{0[0]},{0[1]}".format(location) parm = { 'location': loc, 'key': '2f85f6282750c643366eef4c22bd9306', 'extensions': 'base', 'batch': 'false', 'roadlevel': 0, } req = requests.get(url, params=parm) test_data = req.json()['regeocode'] city = test_data['addressComponent']['city'] province = test_data['addressComponent']['province'] if len(city) == 0: city = province citys.append(city) provinces.append(province) cp_data['city'] = citys cp_data['province'] = provinces return cp_data def Graph_Map(data): maxCount = data.size().max() # data = [(province_to_short(x[0]), x[1]) for x in data] data = list(data.size().items()) c = (Map() .add("强震次数", data, "china",is_map_symbol_show=True,is_roam=True) .set_global_opts(title_opts=optsss.TitleOpts(title="2010-2022年各省份M>=4.5地震次数(熊越)"), visualmap_opts=optsss.VisualMapOpts(max_=int(maxCount), is_piecewise=True) ) .render("雄越---强震次数地图.html") ) # TestLonLat2Pos(data) if __name__ == '__main__': # 读取准备处理好经纬度的数据 data = GetAfterDatas() # 处理城市省份信息 data_after = ChangeCityInfo(data) # 筛选4.5级以上的数据 query_data = QueryDatas(data_after) # 2-可视化 # 词云图html WCloud(query_data) # 地图html Graph_Map(query_data) |
本文为原创文章,版权归Aet所有,欢迎分享本文,转载请保留出处!
你可能也喜欢
- ♥ Python编程从入门到实践 一04/19
- ♥ breakpad记述:Windows下静态库的编译使用03/15
- ♥ 51CTO:C++语言高级课程三08/15
- ♥ Macos自动更新相关06/01
- ♥ Shell 语法记述 第二篇09/05
- ♥ C++17_第三篇06/29