Skip to content

Commit 57e86c3

Browse files
author
Salman Haq
committedApr 25, 2012
Added ckanclient.loaders.util module
1 parent aceb047 commit 57e86c3

File tree

1 file changed

+147
-0
lines changed

1 file changed

+147
-0
lines changed
 

‎ckanclient/loaders/util.py

+147
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#!/usr/bin/python
2+
''' High-level utility functions.
3+
'''
4+
5+
import ckanclient, os, httplib, mimetypes, urlparse, hashlib
6+
from datetime import datetime
7+
8+
def post_multipart(host, selector, fields, files):
9+
"""
10+
Post fields and files to an http host as multipart/form-data.
11+
fields is a sequence of (name, value) elements for regular form fields.
12+
files is a sequence of (name, filename, value) elements for data to be uploaded as files
13+
Return the server's response page.
14+
15+
Taken from http://code.activestate.com/recipes/146306-http-client-to-post-using-multipartform-data/
16+
"""
17+
content_type, body = encode_multipart_formdata(fields, files)
18+
19+
h = httplib.HTTP(host)
20+
h.putrequest('POST', selector)
21+
h.putheader('content-type', content_type)
22+
h.putheader('content-length', str(len(body)))
23+
h.endheaders()
24+
h.send(body)
25+
errcode, errmsg, headers = h.getreply()
26+
return errcode, errmsg, headers, h.file.read()
27+
28+
def encode_multipart_formdata(fields, files):
29+
"""
30+
fields is a sequence of (name, value) elements for regular form fields.
31+
files is a sequence of (name, filename, value) elements for data to be uploaded as files
32+
Return (content_type, body) ready for httplib.HTTP instance
33+
34+
Taken from http://code.activestate.com/recipes/146306-http-client-to-post-using-multipartform-data/
35+
"""
36+
BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
37+
CRLF = '\r\n'
38+
L = []
39+
for (key, value) in fields:
40+
L.append('--' + BOUNDARY)
41+
L.append('Content-Disposition: form-data; name="%s"' % key)
42+
L.append('')
43+
L.append(value)
44+
for (key, filename, value) in files:
45+
L.append('--' + BOUNDARY)
46+
L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
47+
L.append('Content-Type: %s' % get_content_type(filename))
48+
L.append('')
49+
L.append(value)
50+
L.append('--' + BOUNDARY + '--')
51+
L.append('')
52+
body = CRLF.join(L)
53+
content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
54+
return content_type, body
55+
56+
def get_content_type(filename):
57+
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
58+
59+
def upload_file (client, file_path):
60+
""" Upload a file via the filestore api to a CKAN instance.
61+
62+
A timestamped directory is created on the server to store the file as
63+
if it had been uploaded via the graphical interface. On success, the
64+
url of the file is returned along with an empty error message. On failure,
65+
the url is an empty string.
66+
67+
Arguments:
68+
client: a ckan client instance.
69+
file_path: location of the file on the local filesystem.
70+
71+
Return:
72+
url: url of the file on the ckan server.
73+
errmsg: error message from the server.
74+
"""
75+
c = client
76+
# see ckan/public/application.js:makeUploadKey for why the file_key
77+
# is derived this way.
78+
ts = datetime.isoformat(datetime.now()).replace(':','').split('.')[0]
79+
norm_name = os.path.basename(file_path).replace(' ', '-')
80+
file_key = os.path.join(ts, norm_name)
81+
82+
auth_dict = c.storage_auth_get('/form/'+file_key, {})
83+
84+
u = urlparse.urlparse(auth_dict['action'])
85+
fields = [('key', file_key)]
86+
files = [('file', os.path.basename(file_key), open(file_path).read())]
87+
errcode, errmsg, headers, body = post_multipart(u.hostname, u.path, fields, files)
88+
89+
if errcode == 200:
90+
return 'http://%s/storage/f/%s' % (u.netloc, file_key), ''
91+
else:
92+
return '', errmsg
93+
94+
def add_package_resource (client, package_name, file_path_or_url, **kwargs):
95+
""" Add file or url as a resource to a package.
96+
97+
If the resource is a local file, it will be uploaded to the ckan server first.
98+
A dictionary representing the resource is constructed.
99+
The package entity is fetched from the server and the dictionary
100+
is appended to the list of resources. The modified package entity is put
101+
back on the server.
102+
103+
Arguments:
104+
client: a ckan client instancer
105+
package_name: name of the package/dataset
106+
file_path_or_url: path of a local file or a http url.
107+
kwargs: optional keyword arguments are added to the resource dictionary verbatim.
108+
109+
Return:
110+
package_entity: the package entity dictionary as return by the server.
111+
112+
examples:
113+
114+
>>> add_package_resource(client, 'mypkg', '/path/to/local/file', resource_type='data', description='...')
115+
>>> add_package_resource(client, 'mypkg', 'http://example.org/foo.txt', name='Foo', resource_type='metadata', format='csv')
116+
117+
"""
118+
c = client
119+
file_path, url = '', ''
120+
121+
try:
122+
st = os.stat(file_path_or_url)
123+
file_path = file_path_or_url
124+
except OSError, e:
125+
url = file_path_or_url
126+
127+
if file_path:
128+
m = hashlib.md5(open(file_path).read())
129+
url, msg = upload_file(c, file_path)
130+
131+
server_path = urlparse.urlparse(url).path
132+
if server_path.count('/') > 2:
133+
norm_name = '/'.join(server_path.split('/')[-2:])
134+
else:
135+
norm_name = server_path.strip('/')
136+
137+
r = dict(name=norm_name, mimetype=get_content_type(file_path), hash=m.hexdigest(),
138+
size=st.st_size, url=url)
139+
else:
140+
r = dict(url=url)
141+
142+
r.update(kwargs)
143+
if not r.has_key('name'): r['name'] = url
144+
145+
p = c.package_entity_get(package_name)
146+
p['resources'].append(r)
147+
return c.package_entity_put(p)

0 commit comments

Comments
 (0)