json模块
字典格式 -> {“name”:“张三”}
Json -> ‘{“name”:“张三”}’
外面包裹引号
在进行数据格式转化时一定会用到dump | dumps | load | loads中的任意一个或多个
下面介绍一下他们的含义
dump | dumps | load | loads
- dump 和 dumps 都实现了序列化(str转化成json)
- load 和 loads 都实现反序列化(json转化成str)
变量从内存中变成可存储或传输的过程称之为序列化
序列化是将对象状态转化为可保存或可传输格式的过程。
变量内容从序列化的对象重新读到内存里称之为反序列化
反序列化是流转换为对象。
- 序列化和反序列的特性
loads: 是将string转换为dict
load: 是将里json格式字符串转化为dict,读取文件
dumps: 是将dict转换为string
dump: 是将dict类型转换为json格式字符串,存入文件
# -*- encoding: utf-8 -*-
""" @File : ch3Json.py @Contact : ag@team-ag.club @License : (C)Copyright 2019-2020, CodingPark @Modify Time @Author @Version @Desciption ------------ ------- -------- ----------- 2020-07-16 09:46 AG 1.0 None """
import json
json_str = '{"name":"张三"}'
json_str1 = '["name","李四"]'
# 将json格式的str转化成python数据格式
res = json.loads(json_str)
res1 = json.loads(json_str1)
print(type(res))
print(type(res1))
# 将python格式的str转化成json数据格式
dic = {"name": "王五"}
res2 = json.dumps(dic)
print(type(res2))
数据爬取
建议 : Chrom浏览器安装jsonview插件
# -*- encoding: utf-8 -*-
""" @File : spy.py @Contact : ag@team-ag.club @License : (C)Copyright 2019-2020, CodingPark @Modify Time @Author @Version @Desciption ------------ ------- -------- ----------- 2020-07-16 10:03 AG 1.0 None """
import requests
import json
covid_china_url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
header = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
# response = requests.get(url=covid_china_url, headers=header)
# print(response.status_code)
# print(response.json())
# 爬取数据
response = requests.get(url=covid_china_url, headers=header).json()
# 先把json数据改成python字典
data = json.loads(response['data'])
# 再把python字典改成json
redata = json.dumps(data, ensure_ascii=False, indent=2)
# 保存数据
with open('covid_china', 'w') as f:
f.write(redata)
f.flush() # 不是必须加,用f.flush()强制把缓冲区里面的数据写到磁盘上。
f.close()
# ===生成了 好看的 covid_china===
结果展示
json数据转存至excel
# -*- encoding: utf-8 -*-
""" @File : json2excel.py @Contact : ag@team-ag.club @License : (C)Copyright 2019-2020, CodingPark @Modify Time @Author @Version @Desciption ------------ ------- -------- ----------- 2020-07-16 11:18 AG 1.0 json转存到excel """
import json
import pandas as pd
from openpyxl import load_workbook
# 读取文件
with open('covid_china', 'r', encoding='utf-8') as f:
data = f.read()
# print(type(data)) # <class 'str'>
data = json.loads(data)
# print(type(data)) # <class 'dict'>
# 一层一层拨开它的心
# 获取中国所有数据
chinaAreadataDict = data['areaTree'][0]
# 获取国内所有省份数据
provinceList = chinaAreadataDict['children']
# 创建一个列表
china_citylist = []
for x in range(len(provinceList)):
# 遍历每个省份
province = provinceList[x]['name']
# print(province) 成功
# 有多少个市
province_list = provinceList[x]['children']
for y in range(len(province_list)):
# 每个市的数据
city = province_list[y]['name']
today = province_list[y]['today']
total = province_list[y]['total']
# 创建一个字典
city_dict = {
'province': province,
'city': city,
'today': today,
'total': total
}
china_citylist.append(city_dict)
# print(china_citylist) # 成功
pd.set_option('display.max_rows', 500) #设置最大可见100行
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 5000)
chinaTotalData = pd.DataFrame(china_citylist)
# print(chinaTotalData)
# 将chinaTotalData today 和total中 想要的 数据添加到dataframe中
confirmlist = []
for val in chinaTotalData['total']:
confirm = val['confirm']
confirmlist.append(confirm)
# print(confirmlist) # 成功
chinaTotalData['total_confirm'] = confirmlist
# 删除某列
chinaTotalData.drop(['total'], axis=1, inplace=True)
# print(chinaTotalData)
# 拆分today列
today_cinf = []
for val in chinaTotalData['today']:
confirm = val['confirm']
today_cinf.append(confirm)
chinaTotalData['today_confirm'] = today_cinf
chinaTotalData.drop(['today'], axis=1, inplace=True)
print(chinaTotalData)
# 保存到excel
# book = load_workbook('Covid_China.xls') # 如果已经存在excel则使用
writer = pd.ExcelWriter('Covid_China.xls', engine='openpyxl')
# writer.book = book # 如果已经存在excel则使用
# writer.sheets = dict((ws.title, ws) for ws in book.worksheets) # 如果已经存在excel则使用
chinaTotalData.to_excel(writer, index=False)
writer.save()
writer.close()
结果展示
pyecharts引例
# -*- encoding: utf-8 -*-
""" @File : toView.py @Contact : ag@team-ag.club @License : (C)Copyright 2019-2020, CodingPark @Modify Time @Author @Version @Desciption ------------ ------- -------- ----------- 2020-07-17 14:25 AG 1.0 None """
from pyecharts.charts import Bar
bar = Bar()
bar.add_xaxis(['衬衫', '羊毛衫', '高跟鞋', '毛衣', '风衣', '西服', '外套', '背包'])
bar.add_yaxis("Balenciaga", [52, 63, 31, 54, 6, 3, 22, 15])
# render会生成本地html文件,默认会在当前目录生成 render.html 文件
# 也可以传入路径参数 Exam : bar.render(mycharts.html")
bar.render('Balenciaga.html') # 生成本地 HTML 文件
结果展示
pyecharts绘制国内疫情图
# -*- encoding: utf-8 -*-
""" @File : COVID19.py @Contact : ag@team-ag.club @License : (C)Copyright 2019-2020, CodingPark @Modify Time @Author @Version @Desciption ------------ ------- -------- ----------- 2020-07-17 15:22 AG 1.0 None """
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Map
df = pd.read_excel('Covid_China.xls')
# 1 根据Excel绘制国内总疫情图(确诊)
data = df.groupby(by='province', as_index=False).sum()
data_list = list(zip(data['province'].values.tolist(), data['total_confirm'].values.tolist()))
def map_china():
c = (
Map()
.add(series_name='确诊病例', data_pair=data_list, maptype="china")
.set_global_opts(
title_opts=opts.TitleOpts(title='疫情地图'),
visualmap_opts=opts.VisualMapOpts(is_piecewise=True,
pieces=[{"max": 9, "min": 0, "lavel": "0-9", "color": "#FFE4E1"},
{"max": 99, "min": 10, "lavel": "10-99", "color": "#FF7F50"},
{"max": 499, "min": 100, "lavel": "100-499", "color": "#F08080"},
{"max": 999, "min": 500, "lavel": "500-999", "color": "#CD5C5C"},
{"max": 9999, "min": 1000, "lavel": "1000-9999",
"color": "#990000"},
{"max": 99999, "min": 10000, "lavel": ">10000",
"color": "#660000"}]
)
)
)
return c
d_map = map_china()
d_map.render('COVID-19.html')
print('\n=== 成功 ===')
结果展示
更多参考
Pyecharts参考 https://pyecharts.org/#/zh-cn/