json
像这样的文件:
{"authors":[{"ids":["4888852"],"name":"Andrea D. Foebel"},{"ids":["3325893"],"name":"John P. Hirdes"},{"ids":["5316482"],"name":"George A. Heckman"}]}
{"authors":[{"ids":["4836831"],"name":"A. S. Gavrish"},{"ids":["4061357"],"name":"E. N. Kilimnik"}]}
{"authors":[{"ids":["4888852"],"name":"A. S. Gavrish"},{"ids":["4061357"],"name":"E. N. Kilimnik"}]}
代码
import csv
import json
import pandas as pd
from itertools import islice
from collections import Counter
data=[]
with open('papers-2017-10-30-sample.json',encoding='utf-8') as f:
for line in f:
data.append(json.loads(line))
c = Counter(player['ids'] for player in data)
print(c)
我想计算相同ids
的值并用于name
分组
有任何想法吗?请帮忙
import json
from collections import Counter
jsonstr = "[{\"authors\":[{\"ids\":[\"4888852\"],\"name\":\"Andrea D. Foebel\"},{\"ids\":[\"3325893\"],\"name\":\"John P. Hirdes\"},{\"ids\":[\"5316482\"],\"name\":\"George A. Heckman\"}]} ,{\"authors\":[{\"ids\":[\"4836831\"],\"name\":\"A. S. Gavrish\"},{\"ids\":[\"4061357\"],\"name\":\"E. N. Kilimnik\"}]}, {\"authors\":[{\"ids\":[\"4888852\"],\"name\":\"A. S. Gavrish\"},{\"ids\":[\"4061357\"],\"name\":\"E. N. Kilimnik\"}]}]"
jobj = json.loads(jsonstr)
listid = []
print('jobj=', jobj)
for author in jobj:
for book in author['authors']:
ids = book['ids']
for id in ids:
print('id=', id)
listid.append(id)
print('listid=',listid)
c = Counter(listid)
print('keys=',c.keys())
print('values=',c.values())
输出是
jobj= [{'authors': [{'name': 'Andrea D. Foebel', 'ids': ['4888852']}, {'name': 'John P. Hirdes', 'ids': ['3325893']}, {'name': 'George A. Heckman', 'ids': ['5316482']}]}, {'authors': [{'name': 'A. S. Gavrish', 'ids': ['4836831']}, {'name': 'E. N. Kilimnik', 'ids': ['4061357']}]}, {'authors': [{'name': 'A. S. Gavrish', 'ids': ['4888852']}, {'name': 'E. N. Kilimnik', 'ids': ['4061357']}]}]
id= 4888852
id= 3325893
id= 5316482
id= 4836831
id= 4061357
id= 4888852
id= 4061357
listid= ['4888852', '3325893', '5316482', '4836831', '4061357', '4888852', '4061357']
keys= dict_keys(['4888852', '4836831', '5316482', '3325893', '4061357'])
values= dict_values([2, 1, 1, 1, 2])
本文收集自互联网,转载请注明来源。
如有侵权,请联系[email protected] 删除。
我来说两句