-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add IDNA support and integrate with DNS lookup (#2543)
* Add punycode support and integrate with DNS lookup
- 1.15.1
- 1.15.0
- 1.14.1
- 1.14.0
- 1.13.3
- 1.13.2
- 1.13.1
- 1.13.0
- 1.12.2
- 1.12.1
- 1.12.0
- 1.11.2
- 1.11.1
- 1.11.0
- 1.10.1
- 1.10.0
- 1.9.2
- 1.9.1
- 1.9.0
- 1.8.2
- 1.8.1
- 1.8.0
- 1.7.3
- 1.7.2
- 1.7.1
- 1.7.0
- 1.6.2
- 1.6.1
- 1.6.0
- 1.5.1
- 1.5.0
- 1.4.1
- 1.4.0
- 1.3.2
- 1.3.1
- 1.3.0
- 1.2.2
- 1.2.1
- 1.2.0
- 1.1.1
- 1.1.0
- 1.0.0
- 0.36.1
- 0.36.0
- 0.35.1
- 0.35.0
- 0.34.0
- 0.33.0
- 0.32.1
- 0.32.0
- 0.31.1
- 0.31.0
- 0.30.1
- 0.30.0
- 0.29.0
- 0.28.0
- 0.27.2
- 0.27.1
- 0.27.0
- 0.26.1
- 0.26.0
- 0.25.1
- 0.25.0
1 parent
1ce052c
commit e4f637c
Showing
3 changed files
with
210 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
require "spec" | ||
require "uri/punycode" | ||
|
||
describe URI::Punycode do | ||
[ | ||
{"3年B組金八先生", "3B-ww4c5e180e575a65lsy2b"}, | ||
{"安室奈美恵-with-SUPER-MONKEYS", "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"}, | ||
{"Hello-Another-Way-それぞれの場所", "Hello-Another-Way--fc4qua05auwb3674vfr0b"}, | ||
{"ひとつ屋根の下2", "2-u9tlzr9756bt3uc0v"}, | ||
{"MajiでKoiする5秒前", "MajiKoi5-783gue6qz075azm5e"}, | ||
{"パフィーdeルンバ", "de-jg4avhby1noc0d"}, | ||
{"そのスピードで", "d9juau41awczczp"}, | ||
{"Hello-Another-Way-それぞれ", "Hello-Another-Way--fc4qua97gba"}, | ||
].each do |example| | ||
dec, enc = example | ||
|
||
it "encodes #{dec} to #{enc}" do | ||
URI::Punycode.encode(dec).should eq enc | ||
end | ||
|
||
it "decodes #{enc} to #{dec}" do | ||
URI::Punycode.decode(enc).should eq dec | ||
end | ||
end | ||
|
||
it "translate to ascii only host name" do | ||
URI::Punycode.to_ascii("test.テスト.テスト").should eq "test.xn--zckzah.xn--zckzah" | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
# `Punycode` provides an interface for IDNA encoding (RFC 5980), | ||
# which is defined in RFC 3493 | ||
# | ||
# Implementation based on Mathias Bynens `punnycode.js` project | ||
# https://github.com/bestiejs/punycode.js/ | ||
# | ||
# RFC 3492: | ||
# Method to use non-ascii characters as host name of URI | ||
# https://www.ietf.org/rfc/rfc3492.txt | ||
# | ||
# RFC 5980: | ||
# Internationalized Domain Names in Application | ||
# https://www.ietf.org/rfc/rfc5980.txt | ||
class URI | ||
class Punycode | ||
private BASE = 36 | ||
private TMIN = 1 | ||
private TMAX = 26 | ||
private SKEW = 38 | ||
private DAMP = 700 | ||
private INITIAL_BIAS = 72 | ||
private INITIAL_N = 128 | ||
|
||
private DELIMITER = '-' | ||
|
||
private BASE36 = "abcdefghijklmnopqrstuvwxyz0123456789" | ||
|
||
private def self.adapt(delta, numpoints, firsttime) | ||
delta /= firsttime ? DAMP : 2 | ||
delta += delta / numpoints | ||
k = 0 | ||
while delta > ((BASE - TMIN) * TMAX) / 2 | ||
delta /= BASE - TMIN | ||
k += BASE | ||
end | ||
k + (((BASE - TMIN + 1) * delta) / (delta + SKEW)) | ||
end | ||
|
||
def self.encode(string) | ||
String.build { |io| encode string, io } | ||
end | ||
|
||
def self.encode(string, io) | ||
others = [] of Char | ||
|
||
string.each_char do |c| | ||
if c < '\u0080' | ||
io << c | ||
else | ||
others.push c | ||
end | ||
end | ||
|
||
return if others.empty? | ||
others.sort! | ||
|
||
h = string.size - others.size + 1 | ||
delta = 0_u32 | ||
n = INITIAL_N | ||
bias = INITIAL_BIAS | ||
firsttime = true | ||
prev = nil | ||
|
||
io << DELIMITER if h > 1 | ||
|
||
others.each do |m| | ||
next if m == prev | ||
prev = m | ||
|
||
raise Exception.new("Overflow: input needs wider integers to process") if m.ord - n > (Int32::MAX - delta) / h | ||
delta += (m.ord - n) * h | ||
n = m.ord + 1 | ||
|
||
string.each_char do |c| | ||
if c < m | ||
raise Exception.new("Overflow: input needs wider integers to process") if delta > Int32::MAX - 1 | ||
delta += 1 | ||
elsif c == m | ||
q = delta | ||
k = BASE | ||
loop do | ||
t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias | ||
break if q < t | ||
io << BASE36[t + ((q - t) % (BASE - t))] | ||
q = (q - t) / (BASE - t) | ||
k += BASE | ||
end | ||
io << BASE36[q] | ||
|
||
bias = adapt delta, h, firsttime | ||
delta = 0 | ||
h += 1 | ||
firsttime = false | ||
end | ||
end | ||
delta += 1 | ||
end | ||
end | ||
|
||
def self.decode(string) | ||
output, _, rest = string.rpartition(DELIMITER) | ||
output = output.chars | ||
|
||
n = INITIAL_N | ||
bias = INITIAL_BIAS | ||
i = 0 | ||
init = true | ||
w = oldi = k = 0 | ||
|
||
rest.each_char do |c| | ||
if init | ||
w = 1 | ||
oldi = i | ||
k = BASE | ||
init = false | ||
end | ||
|
||
digit = case c | ||
when .ascii_lowercase? | ||
c.ord - 0x61 | ||
when .ascii_uppercase? | ||
c.ord - 0x41 | ||
when .ascii_number? | ||
c.ord - 0x30 + 26 | ||
else | ||
raise ArgumentError.new("Invalid input") | ||
end | ||
|
||
i += digit * w | ||
t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias | ||
|
||
unless digit < t | ||
w *= BASE - t | ||
k += BASE | ||
else | ||
outsize = output.size + 1 | ||
bias = adapt i - oldi, outsize, oldi == 0 | ||
n += i / outsize | ||
i %= outsize | ||
output.insert i, n.chr | ||
i += 1 | ||
init = true | ||
end | ||
end | ||
|
||
raise ArgumentError.new("Invalid input") unless init | ||
|
||
output.join | ||
end | ||
|
||
def self.to_ascii(string) | ||
return string if string.ascii_only? | ||
|
||
String.build do |io| | ||
first = true | ||
string.split('.') do |part| | ||
unless first | ||
io << "." | ||
end | ||
|
||
if part.ascii_only? | ||
io << part | ||
else | ||
io << "xn--" | ||
encode part, io | ||
end | ||
|
||
first = false | ||
end | ||
end | ||
end | ||
end | ||
end |