Skip to content

Commit

Permalink
Merge pull request #336 from gittip/simplify_encoding_parsing
Browse files Browse the repository at this point in the history
Simplify the encoding parsing routine
  • Loading branch information
pjz committed May 1, 2014
2 parents 17254ce + 22c7ff3 commit 2c08c44
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 16 deletions.
26 changes: 10 additions & 16 deletions aspen/resources/__init__.py
Expand Up @@ -79,31 +79,25 @@ def get_declaration(line):
return match.group(1)
return None

encoding = b'ascii'
encoding = None
fulltext = b''
sio = StringIO(raw)
first = sio.readline()
second = sio.readline()
for i, line in enumerate([first, second]):
for line in (sio.readline(), sio.readline()):
potential = get_declaration(line)
if potential is not None:
if encoding is not None:
# If we found a match in the first line, skip the second. This
# matches Python's observed behavior.
pass
elif potential is not None:
encoding = potential

# Munge the encoding line. We want to preserve the line numbering,
# but when we exec down the line Python will complain if we have a
# coding: line in a unicode.
fulltext += line.split(b'#')[0] + b'# encoding set to {0}\n'.format(encoding)

# If we find a match in the first line, we want to skip the second.
# This matches Python's observed behavior.
if i == 0:
fulltext += second
break
else:
fulltext += line
line = line.split(b'#')[0] + b'# encoding set to {0}\n'.format(encoding)
fulltext += line
fulltext += sio.read()
sio.close()
return fulltext.decode(encoding)
return fulltext.decode(encoding or b'ascii')


# Core loaders
Expand Down
1 change: 1 addition & 0 deletions tests/test_unicode.py
Expand Up @@ -122,6 +122,7 @@ def test_decode_raw_cant_take_encoding_from_bad_line_formats():
, b' coding : utf8'
, b'encoding : utf8'
, b' flubcoding =utf8'
, b'coding: '
]
for fmt in formats:
def test():
Expand Down

0 comments on commit 2c08c44

Please sign in to comment.