|
| 1 | +#!/usr/bin/python |
| 2 | +''' High-level utility functions. |
| 3 | +''' |
| 4 | + |
| 5 | +import ckanclient, os, httplib, mimetypes, urlparse, hashlib |
| 6 | +from datetime import datetime |
| 7 | + |
| 8 | +def post_multipart(host, selector, fields, files): |
| 9 | + """ |
| 10 | + Post fields and files to an http host as multipart/form-data. |
| 11 | + fields is a sequence of (name, value) elements for regular form fields. |
| 12 | + files is a sequence of (name, filename, value) elements for data to be uploaded as files |
| 13 | + Return the server's response page. |
| 14 | +
|
| 15 | + Taken from http://code.activestate.com/recipes/146306-http-client-to-post-using-multipartform-data/ |
| 16 | + """ |
| 17 | + content_type, body = encode_multipart_formdata(fields, files) |
| 18 | + |
| 19 | + h = httplib.HTTP(host) |
| 20 | + h.putrequest('POST', selector) |
| 21 | + h.putheader('content-type', content_type) |
| 22 | + h.putheader('content-length', str(len(body))) |
| 23 | + h.endheaders() |
| 24 | + h.send(body) |
| 25 | + errcode, errmsg, headers = h.getreply() |
| 26 | + return errcode, errmsg, headers, h.file.read() |
| 27 | + |
| 28 | +def encode_multipart_formdata(fields, files): |
| 29 | + """ |
| 30 | + fields is a sequence of (name, value) elements for regular form fields. |
| 31 | + files is a sequence of (name, filename, value) elements for data to be uploaded as files |
| 32 | + Return (content_type, body) ready for httplib.HTTP instance |
| 33 | +
|
| 34 | + Taken from http://code.activestate.com/recipes/146306-http-client-to-post-using-multipartform-data/ |
| 35 | + """ |
| 36 | + BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$' |
| 37 | + CRLF = '\r\n' |
| 38 | + L = [] |
| 39 | + for (key, value) in fields: |
| 40 | + L.append('--' + BOUNDARY) |
| 41 | + L.append('Content-Disposition: form-data; name="%s"' % key) |
| 42 | + L.append('') |
| 43 | + L.append(value) |
| 44 | + for (key, filename, value) in files: |
| 45 | + L.append('--' + BOUNDARY) |
| 46 | + L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)) |
| 47 | + L.append('Content-Type: %s' % get_content_type(filename)) |
| 48 | + L.append('') |
| 49 | + L.append(value) |
| 50 | + L.append('--' + BOUNDARY + '--') |
| 51 | + L.append('') |
| 52 | + body = CRLF.join(L) |
| 53 | + content_type = 'multipart/form-data; boundary=%s' % BOUNDARY |
| 54 | + return content_type, body |
| 55 | + |
| 56 | +def get_content_type(filename): |
| 57 | + return mimetypes.guess_type(filename)[0] or 'application/octet-stream' |
| 58 | + |
| 59 | +def upload_file (client, file_path): |
| 60 | + """ Upload a file via the filestore api to a CKAN instance. |
| 61 | + |
| 62 | + A timestamped directory is created on the server to store the file as |
| 63 | + if it had been uploaded via the graphical interface. On success, the |
| 64 | + url of the file is returned along with an empty error message. On failure, |
| 65 | + the url is an empty string. |
| 66 | +
|
| 67 | + Arguments: |
| 68 | + client: a ckan client instance. |
| 69 | + file_path: location of the file on the local filesystem. |
| 70 | +
|
| 71 | + Return: |
| 72 | + url: url of the file on the ckan server. |
| 73 | + errmsg: error message from the server. |
| 74 | + """ |
| 75 | + c = client |
| 76 | + # see ckan/public/application.js:makeUploadKey for why the file_key |
| 77 | + # is derived this way. |
| 78 | + ts = datetime.isoformat(datetime.now()).replace(':','').split('.')[0] |
| 79 | + norm_name = os.path.basename(file_path).replace(' ', '-') |
| 80 | + file_key = os.path.join(ts, norm_name) |
| 81 | + |
| 82 | + auth_dict = c.storage_auth_get('/form/'+file_key, {}) |
| 83 | + |
| 84 | + u = urlparse.urlparse(auth_dict['action']) |
| 85 | + fields = [('key', file_key)] |
| 86 | + files = [('file', os.path.basename(file_key), open(file_path).read())] |
| 87 | + errcode, errmsg, headers, body = post_multipart(u.hostname, u.path, fields, files) |
| 88 | + |
| 89 | + if errcode == 200: |
| 90 | + return 'http://%s/storage/f/%s' % (u.netloc, file_key), '' |
| 91 | + else: |
| 92 | + return '', errmsg |
| 93 | + |
| 94 | +def add_package_resource (client, package_name, file_path_or_url, **kwargs): |
| 95 | + """ Add file or url as a resource to a package. |
| 96 | +
|
| 97 | + If the resource is a local file, it will be uploaded to the ckan server first. |
| 98 | + A dictionary representing the resource is constructed. |
| 99 | + The package entity is fetched from the server and the dictionary |
| 100 | + is appended to the list of resources. The modified package entity is put |
| 101 | + back on the server. |
| 102 | +
|
| 103 | + Arguments: |
| 104 | + client: a ckan client instancer |
| 105 | + package_name: name of the package/dataset |
| 106 | + file_path_or_url: path of a local file or a http url. |
| 107 | + kwargs: optional keyword arguments are added to the resource dictionary verbatim. |
| 108 | +
|
| 109 | + Return: |
| 110 | + package_entity: the package entity dictionary as return by the server. |
| 111 | +
|
| 112 | + examples: |
| 113 | +
|
| 114 | + >>> add_package_resource(client, 'mypkg', '/path/to/local/file', resource_type='data', description='...') |
| 115 | + >>> add_package_resource(client, 'mypkg', 'http://example.org/foo.txt', name='Foo', resource_type='metadata', format='csv') |
| 116 | +
|
| 117 | + """ |
| 118 | + c = client |
| 119 | + file_path, url = '', '' |
| 120 | + |
| 121 | + try: |
| 122 | + st = os.stat(file_path_or_url) |
| 123 | + file_path = file_path_or_url |
| 124 | + except OSError, e: |
| 125 | + url = file_path_or_url |
| 126 | + |
| 127 | + if file_path: |
| 128 | + m = hashlib.md5(open(file_path).read()) |
| 129 | + url, msg = upload_file(c, file_path) |
| 130 | + |
| 131 | + server_path = urlparse.urlparse(url).path |
| 132 | + if server_path.count('/') > 2: |
| 133 | + norm_name = '/'.join(server_path.split('/')[-2:]) |
| 134 | + else: |
| 135 | + norm_name = server_path.strip('/') |
| 136 | + |
| 137 | + r = dict(name=norm_name, mimetype=get_content_type(file_path), hash=m.hexdigest(), |
| 138 | + size=st.st_size, url=url) |
| 139 | + else: |
| 140 | + r = dict(url=url) |
| 141 | + |
| 142 | + r.update(kwargs) |
| 143 | + if not r.has_key('name'): r['name'] = url |
| 144 | + |
| 145 | + p = c.package_entity_get(package_name) |
| 146 | + p['resources'].append(r) |
| 147 | + return c.package_entity_put(p) |
0 commit comments