Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 96060b3

Browse files
committedJul 13, 2015
Adding more attachment metadata.
For this, also merged download_additional_image_sizes into import_attachment.
1 parent 99f990d commit 96060b3

File tree

1 file changed

+119
-50
lines changed

1 file changed

+119
-50
lines changed
 

‎nikola/plugins/command/import_wordpress.py

+119-50
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ def download_url_content_to_file(self, url, dst_path):
439439
LOGGER.warn("Downloading {0} to {1} failed: {2}".format(url, dst_path, err))
440440

441441
def import_attachment(self, item, wordpress_namespace):
442+
# Download main image
442443
url = get_text_tag(
443444
item, '{{{0}}}attachment_url'.format(wordpress_namespace), 'foo')
444445
link = get_text_tag(item, '{{{0}}}link'.format(wordpress_namespace),
@@ -453,62 +454,130 @@ def import_attachment(self, item, wordpress_namespace):
453454
links[link] = '/' + dst_url
454455
links[url] = '/' + dst_url
455456

456-
result = {}
457-
result['files'] = [path] + self.download_additional_image_sizes(
458-
item,
459-
wordpress_namespace,
460-
os.path.dirname(url)
461-
)
462-
return result
463-
464-
def download_additional_image_sizes(self, item, wordpress_namespace, source_path):
465-
if phpserialize is None:
466-
return []
457+
files = [path]
458+
files_meta = [{}]
467459

468460
additional_metadata = item.findall('{{{0}}}postmeta'.format(wordpress_namespace))
469-
if additional_metadata is None:
470-
return []
471-
472-
result = []
473-
for element in additional_metadata:
474-
meta_key = element.find('{{{0}}}meta_key'.format(wordpress_namespace))
475-
if meta_key is not None and meta_key.text == '_wp_attachment_metadata':
476-
meta_value = element.find('{{{0}}}meta_value'.format(wordpress_namespace))
461+
if phpserialize and additional_metadata:
462+
for element in additional_metadata:
463+
meta_key = element.find('{{{0}}}meta_key'.format(wordpress_namespace))
464+
if meta_key is not None and meta_key.text == '_wp_attachment_metadata':
465+
meta_value = element.find('{{{0}}}meta_value'.format(wordpress_namespace))
466+
467+
if meta_value is None:
468+
continue
469+
470+
# Someone from Wordpress thought it was a good idea
471+
# serialize PHP objects into that metadata field. Given
472+
# that the export should give you the power to insert
473+
# your blogging into another site or system its not.
474+
# Why don't they just use JSON?
475+
if sys.version_info[0] == 2:
476+
try:
477+
metadata = phpserialize.loads(utils.sys_encode(meta_value.text))
478+
except ValueError:
479+
# local encoding might be wrong sometimes
480+
metadata = phpserialize.loads(meta_value.text.encode('utf-8'))
481+
else:
482+
metadata = phpserialize.loads(meta_value.text.encode('utf-8'))
477483

478-
if meta_value is None:
479-
continue
484+
meta_key = b'image_meta'
485+
size_key = b'sizes'
486+
file_key = b'file'
487+
width_key = b'width'
488+
height_key = b'height'
489+
490+
# Extract metadata
491+
if width_key in metadata and height_key in metadata:
492+
files_meta[0]['width'] = int(metadata[width_key])
493+
files_meta[0]['height'] = int(metadata[height_key])
494+
495+
if meta_key in metadata:
496+
image_meta = metadata[meta_key]
497+
dst_meta = {}
498+
499+
def add(our_key, wp_key, is_int=False, ignore_zero=False):
500+
if wp_key in image_meta:
501+
value = image_meta[wp_key]
502+
if is_int:
503+
value = int(value)
504+
if ignore_zero and value == 0:
505+
return
506+
else:
507+
value = value.decode('utf-8') # assume UTF-8
508+
if value == '': # skip empty values
509+
return
510+
dst_meta[our_key] = value
511+
512+
add('aperture', b'aperture', is_int=True, ignore_zero=True)
513+
add('credit', b'credit')
514+
add('camera', b'camera')
515+
add('caption', b'caption')
516+
add('created_timestamp', b'created_timestamp', is_int=True, ignore_zero=True)
517+
add('copyright', b'copyright')
518+
add('focal_length', b'focal_length', is_int=True, ignore_zero=True)
519+
add('iso', b'iso', is_int=True, ignore_zero=True)
520+
add('shutter_speed', b'shutter_speed', is_int=True, ignore_zero=True)
521+
add('title', b'title')
522+
523+
if len(dst_meta) > 0:
524+
files_meta[0]['meta'] = dst_meta
525+
526+
# Find other sizes of image
527+
if size_key not in metadata:
528+
continue
529+
530+
for size in metadata[size_key]:
531+
filename = metadata[size_key][size][file_key]
532+
url = '/'.join([source_path, filename.decode('utf-8')])
533+
534+
# Construct metadata
535+
meta = {}
536+
meta['size'] = size.decode('utf-8')
537+
if width_key in metadata[size_key][size] and height_key in metadata[size_key][size]:
538+
meta['width'] = metadata[size_key][size][width_key]
539+
meta['height'] = metadata[size_key][size][height_key]
540+
541+
path = urlparse(url).path
542+
dst_path = os.path.join(*([self.output_folder, 'files'] + list(path.split('/'))))
543+
dst_dir = os.path.dirname(dst_path)
544+
utils.makedirs(dst_dir)
545+
LOGGER.info("Downloading {0} => {1}".format(url, dst_path))
546+
self.download_url_content_to_file(url, dst_path)
547+
dst_url = '/'.join(dst_path.split(os.sep)[2:])
548+
links[url] = '/' + dst_url
549+
550+
files.append(path)
551+
files_meta.append(meta)
552+
553+
# Prepare result
554+
result = {}
555+
result['files'] = files
556+
result['files_meta'] = files_meta
480557

481-
# Someone from Wordpress thought it was a good idea
482-
# serialize PHP objects into that metadata field. Given
483-
# that the export should give you the power to insert
484-
# your blogging into another site or system its not.
485-
# Why don't they just use JSON?
486-
if sys.version_info[0] == 2:
487-
try:
488-
metadata = phpserialize.loads(utils.sys_encode(meta_value.text))
489-
except ValueError:
490-
# local encoding might be wrong sometimes
491-
metadata = phpserialize.loads(meta_value.text.encode('utf-8'))
492-
else:
493-
metadata = phpserialize.loads(meta_value.text.encode('utf-8'))
494-
size_key = b'sizes'
495-
file_key = b'file'
558+
# Prepare extraction of more information
559+
dc_namespace = item.nsmap['dc']
560+
content_namespace = item.nsmap['content']
561+
excerpt_namespace = item.nsmap['excerpt']
496562

497-
if size_key not in metadata:
498-
continue
563+
def add(result_key, key, namespace=None, filter=None, store_empty=False):
564+
if namespace is not None:
565+
value = get_text_tag(item, '{{{0}}}{1}'.format(namespace, key), None)
566+
else:
567+
value = get_text_tag(item, key, None)
568+
if value is not None:
569+
if filter:
570+
value = filter(value)
571+
if value or store_empty:
572+
result[result_key] = value
573+
574+
add('title', 'title')
575+
add('date_utc', 'post_date_gmt', namespace=wordpress_namespace)
576+
add('wordpress_user_name', 'creator', namespace=dc_namespace)
577+
add('content', 'encoded', namespace=content_namespace)
578+
add('excerpt', 'encoded', namespace=excerpt_namespace)
579+
add('description', 'description')
499580

500-
for filename in [metadata[size_key][size][file_key] for size in metadata[size_key]]:
501-
url = '/'.join([source_path, filename.decode('utf-8')])
502-
503-
path = urlparse(url).path
504-
dst_path = os.path.join(*([self.output_folder, 'files'] + list(path.split('/'))))
505-
dst_dir = os.path.dirname(dst_path)
506-
utils.makedirs(dst_dir)
507-
LOGGER.info("Downloading {0} => {1}".format(url, dst_path))
508-
self.download_url_content_to_file(url, dst_path)
509-
dst_url = '/'.join(dst_path.split(os.sep)[2:])
510-
links[url] = '/' + dst_url
511-
result.append(path)
512581
return result
513582

514583
code_re1 = re.compile(r'\[code.* lang.*?="(.*?)?".*\](.*?)\[/code\]', re.DOTALL | re.MULTILINE)

0 commit comments

Comments
 (0)
Please sign in to comment.