Skip to content

Commit

Permalink
URL Expander bugfix: Add an endless redirect loop prevention counter
Browse files Browse the repository at this point in the history
  • Loading branch information
ginatrapani committed Jun 18, 2012
1 parent cf90d40 commit 1a64cee
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 2 deletions.
11 changes: 9 additions & 2 deletions webapp/plugins/expandurls/model/class.ExpandURLsPlugin.php
Expand Up @@ -34,7 +34,6 @@ class ExpandURLsPlugin extends Plugin implements CrawlerPlugin {
* @var int
*/
var $link_limit = 0;

/**
* @var Logger
*/
Expand All @@ -48,6 +47,11 @@ class ExpandURLsPlugin extends Plugin implements CrawlerPlugin {
* @var ShortLinkDAO
*/
var $short_link_dao;
/**
* Maximum number of times to expand a given URL. This cap prevents endless expansion loops.
* @var int
*/
const EXPANSION_CAP = 8;
public function __construct($vals=null) {
parent::__construct($vals);
$this->folder_name = 'expandurls';
Expand Down Expand Up @@ -122,6 +126,7 @@ public function expandOriginalURLs($flickr_api_key=null) {
$has_expanded_flickr_link = false;
foreach ($links_to_expand as $index=>$link) {
if (Utils::validateURL($link->url)) {
$endless_loop_prevention_counter = 0;
$this->logger->logInfo("Expanding ".($total_expanded+1). " of ".count($links_to_expand)." (".
$link->url.")", __METHOD__.','.__LINE__);

Expand All @@ -139,12 +144,14 @@ public function expandOriginalURLs($flickr_api_key=null) {
//end Flickr thumbnail processing
$expanded_url = URLExpander::expandURL($short_link,$link->url, $index, count($links_to_expand),
$this->link_dao, $this->logger);
if ($expanded_url == $short_link || $expanded_url == '') {
if ($expanded_url == $short_link || $expanded_url == ''
|| $endless_loop_prevention_counter > self::EXPANSION_CAP) {
$fully_expanded = true;
} else {
$this->short_link_dao->insert($link->id, $short_link);
}
$short_link = $expanded_url;
$endless_loop_prevention_counter++;
}
if (!$has_expanded_flickr_link) {
if ($expanded_url != '' ) {
Expand Down
1 change: 1 addition & 0 deletions webapp/plugins/expandurls/model/class.URLExpander.php
Expand Up @@ -63,6 +63,7 @@ public static function expandURL($tinyurl, $original_link, $current_number, $tot
$scheme = isset($url['scheme'])?$url['scheme']:'http';

$reconstructed_url = $scheme."://$host$port".$path.$query.$fragment;
$logger->logInfo("Making cURL request for ".$reconstructed_url, __METHOD__.','.__LINE__);
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $reconstructed_url);
Expand Down
38 changes: 38 additions & 0 deletions webapp/plugins/expandurls/tests/TestOfExpandURLsPlugin.php
Expand Up @@ -33,6 +33,7 @@
require_once THINKUP_WEBAPP_PATH.'plugins/expandurls/tests/classes/mock.FlickrAPIAccessor.php';
require_once THINKUP_WEBAPP_PATH.'plugins/expandurls/tests/classes/mock.BitlyAPIAccessor.php';
require_once THINKUP_WEBAPP_PATH.'plugins/expandurls/tests/classes/mock.URLExpander.php';
//require_once THINKUP_WEBAPP_PATH.'plugins/expandurls/model/class.URLExpander.php';
require_once THINKUP_WEBAPP_PATH.'plugins/expandurls/model/class.ExpandURLsPlugin.php';
//require_once THINKUP_WEBAPP_PATH.'plugins/expandurls/model/class.BitlyAPIAccessor.php';

Expand Down Expand Up @@ -640,4 +641,41 @@ private function buildBitlyData() {
}
return $builders;
}

//To test this with live URLs (which are endless loops as of 6/17/2012) comment out mock URLExpander
//and comment in live URLExpander
// public function testURLExpansionWithEndlessLoop() {
// $builders[] = FixtureBuilder::build('owners', array(
// 'id' => 1,
// 'email' => 'admin@example.com',
// 'pwd' => 'XXX',
// 'is_activated' => 1,
// 'is_admin' => 1
// ));
//
// $builders[] = FixtureBuilder::build('links', array(
// 'id' => 250,
// 'url' => 'http://t.co/If5llJOb',
// 'expanded_url' => null,
// 'title' => '',
// 'clicks' => 0,
// 'post_id' => 1,
// 'image_src' => '',
// 'error' => null
// ));
//
// $builders[] = FixtureBuilder::build('links', array(
// 'id' => 251,
// 'url' => 'http://t.co/V7NDaubm',
// 'expanded_url' => null,
// 'title' => '',
// 'clicks' => 0,
// 'post_id' => 1,
// 'image_src' => '',
// 'error' => null
// ));
// $this->simulateLogin('admin@example.com', true);
// $crawler = Crawler::getInstance();
// $crawler->crawl();
// }
}
18 changes: 18 additions & 0 deletions webapp/plugins/expandurls/tests/classes/mock.URLExpander.php
Expand Up @@ -116,6 +116,24 @@ public static function expandURL($tinyurl, $original_link, $current_number, $tot
case "http://www.flickr.com/photos/swirlee/5173198094/":
$exp_url = "http://www.flickr.com/photos/swirlee/5173198094/";
break;
case "http://t.co/If5llJOb":
$exp_url = "http://girlgeekdinners.origo.no/-/bulletin/show/573985_kvinnelige-it-foredragsholdere";
break;
case "http://girlgeekdinners.origo.no/-/bulletin/show/573985_kvinnelige-it-foredragsholdere":
$exp_url = "http://apressen.o5.no/api/checkpoint/v1/transfer?target=http%3A%2F%2Fgirlgeekdinners.origo.no%2F-%2Fbulletin%2Fshow%2F573985_kvinnelige-it-foredragsholdere";
break;
case "http://apressen.o5.no/api/checkpoint/v1/transfer?target=http%3A%2F%2Fgirlgeekdinners.origo.no%2F-%2Fbulletin%2Fshow%2F573985_kvinnelige-it-foredragsholdere":
$exp_url = "http://girlgeekdinners.origo.no/api/checkpoint/v1/transfer?target=http%3A%2F%2Fgirlgeekdinners.origo.no%2F-%2Fbulletin%2Fshow%2F573985_kvinnelige-it-foredragsholdere&session=7j2piqwbbn34tso94cdusl7kxazc03ki9r2x0yzsehh4zs7y64u4uhib5mdykvy6b5xe3kscqqm92gk3p1x17jyhv7fp03uukkw";
break;
case "http://girlgeekdinners.origo.no/api/checkpoint/v1/transfer?target=http%3A%2F%2Fgirlgeekdinners.origo.no%2F-%2Fbulletin%2Fshow%2F573985_kvinnelige-it-foredragsholdere&session=7j2piqwbbn34tso94cdusl7kxazc03ki9r2x0yzsehh4zs7y64u4uhib5mdykvy6b5xe3kscqqm92gk3p1x17jyhv7fp03uukkw":
$exp_url = "http://girlgeekdinners.origo.no/-/bulletin/show/573985_kvinnelige-it-foredragsholdere";
break;
case "http://t.co/V7NDaubm":
$exp_url = "http://t.co/V7NDaubm/endlessredirectloop";
break;
case "http://t.co/V7NDaubm/endlessredirectloop":
$exp_url = "http://t.co/V7NDaubm";
break;
default:
$exp_url = '';
}
Expand Down

0 comments on commit 1a64cee

Please sign in to comment.