Abracadabra

Get Pois uses God-map apis

伪代码如下:

1
2
3
4
5
从Excel文件中读出数据
对于每一个house:
提取出其location字段(经纬度)
将location字段作为输入参数传给map api
将返回值进行适当筛选最后存入原数据集中

代码实现如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import xlrd
import pickle
import requests
def ReadHousesInfoFromExcel(
file_name='houses_nadrop.xls', sheet_name='小区信息'):
""" Read the houses detail information from the excel-type file.
Arguments:
file_name: the name of the excel-type file.
sheet_name: the name of the sheet of the excel file.
Returns:
houses: A dict that contains the detail information of each house.
"""
HOUSES_FILE_NAME = 'houses.pkl'
HOUSES_DETAIL_TAB = ['name', 'address', 'property_category', 'area',
'avg_price', 'location', 'property_costs',
'volume_rate', 'green_rate']
houses = []
if (os.path.isfile(HOUSES_FILE_NAME)):
with open(HOUSES_FILE_NAME, 'rb') as f:
houses = pickle.load(f)
else:
workBook = xlrd.open_workbook(file_name)
bookSheet = workBook.sheet_by_name(sheet_name)
# read from second row because of the first row has tabs
for row in range(1, bookSheet.nrows):
house = {}
for col in range(bookSheet.ncols):
cel = bookSheet.cell(row, col)
try:
val = cel.value
except:
pass
val = str(val)
house[HOUSES_DETAIL_TAB[col]] = val
houses.append(house)
with open(HOUSES_FILE_NAME, 'wb') as f:
pickle.dump(houses, f)
return houses
def Geocode(location, poi_type):
""" A tool that call the God-Map api.
Arguments:
location: The location of house.
poi_type: The poi type.
Returns:
answer: The JSON-type data that contains pois infomation.
"""
location = str(location).strip()
parameters = {'location': location,
'key': 'e798a5bfb344a09977b79552ae415974',
'types': poi_type,
'offset': 10,
'page': 1,
'extensions': 'base'}
base = 'http://restapi.amap.com/v3/place/around'
try:
response = requests.get(base, parameters)
answer = response.json()
except Exception as e:
print('error!', e)
answer = 'null'
finally:
pass
return answer
def GetPOI(houses):
""" Get the pois information of the houses according to the location.
Arguments:
houses: The house detail information.
Returns:
houses_with_pois: The house detail information
that contains the pois information.
"""
POI_TYPE_LAB = ['subway_station', 'bus_station', 'parking_lot',
'primary_school', 'secondary_school', 'university',
'mall', 'park']
POI_TYPE_CODE = ['150500', '150700', '150904', '141203', '141202',
'141201', '060100', '110101']
KEEP_INFO_LAB = ['name', 'location', 'distance']
NO_INFO_NOW = '-'
SIZE = len(houses)
houses_with_pois = houses.copy()
count = 0
for house in houses_with_pois:
count = count + 1
if count % 100 == 0:
print(count, '', SIZE)
house['pois'] = {}
for poi_type_index in range(len(POI_TYPE_LAB)):
poi_info_json = Geocode(house['location'],
POI_TYPE_CODE[poi_type_index])
if poi_info_json == 'null' or poi_info_json['pois'] is None:
house['pois'][POI_TYPE_LAB[poi_type_index]] = NO_INFO_NOW
else:
house['pois'][POI_TYPE_LAB[poi_type_index]] = []
for poi in poi_info_json['pois']:
pois_without_useless = {}
for key in poi.keys():
if key in KEEP_INFO_LAB:
pois_without_useless[key] = poi[key]
house['pois'][POI_TYPE_LAB[poi_type_index]].append(
pois_without_useless)
# return houses_with_pois
return houses_with_pois
if __name__ == '__main__':
houses = ReadHousesInfoFromExcel()
# answer = Geocode(houses[0]['location'], '150905')
houses_with_pois = GetPOI(houses)

总结一下有几个注意点:

  1. 传给parameters的location参数的格式一定要规范,前后都不能有空格
  2. for循环中不能改变字典的大小,这里的大小不仅指其元素的数目,也包括其总占用空间的大小
  3. 注意pickle的用法
  4. 从Excel中读出的内容要转成str格式

整个过程十分清晰明了,值得注意的是细节问题