* @copyright Copyright © 2006 Peter Adams * @license http://www.gnu.org/copyleft/gpl.html GPL v2.0 * @category owa * @package owa * @version $Revision$ * @since owa 1.0.0 */ class owa_news extends lastRSS { /** * Configuration * * @var array */ var $config; /** * Error handler * * @var object */ var $e; var $crawler; function owa_news() { $c = &owa_coreAPI::configSingleton(); $this->config = $c->fetch('base'); $this->e = &owa_coreAPI::errorSingleton(); $this->crawler = new owa_http; $this->crawler->read_timeout = 20; $this->cache_dir = ''; $this->date_format = "F j, Y"; $this->CDATA = 'content'; $this->items_limit = 3; return; } /** * This is a redefined Parse function that uses Snoopy to fetch * the file instead of fopen. * * @param unknown_type $rss_url * @return unknown */ function Parse ($rss_url) { // Open and load RSS file $this->crawler->getRequest($rss_url); $rss_content = $this->crawler->response; if (!empty($rss_content)) { // Parse document encoding $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content); // if document codepage is specified, use it if ($result['encoding'] != '') { $this->rsscp = $result['encoding']; } // This is used in my_preg_match() // otherwise use the default codepage else { $this->rsscp = $this->default_cp; } // This is used in my_preg_match() // Parse CHANNEL info preg_match("'(.*?)'si", $rss_content, $out_channel); foreach($this->channeltags as $channeltag) { $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)'si", $out_channel[1]); if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty } // If date_format is specified and lastBuildDate is valid if ($this->date_format != '' && ($timestamp = strtotime($result['lastBuildDate'])) !==-1) { // convert lastBuildDate to specified date format $result['lastBuildDate'] = date($this->date_format, $timestamp); } // Parse TEXTINPUT info preg_match("']*[^/])>(.*?)'si", $rss_content, $out_textinfo); // This a little strange regexp means: // Look for tag with or without any attributes, but skip truncated version (it's not beggining tag) if (isset($out_textinfo[2])) { foreach($this->textinputtags as $textinputtag) { $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)'si", $out_textinfo[2]); if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty } } // Parse IMAGE info preg_match("'(.*?)'si", $rss_content, $out_imageinfo); if (isset($out_imageinfo[1])) { foreach($this->imagetags as $imagetag) { $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)'si", $out_imageinfo[1]); if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty } } // Parse ITEMS preg_match_all("'(.*?)'si", $rss_content, $items); $rss_items = $items[2]; $i = 0; $result['items'] = array(); // create array even if there are no items foreach($rss_items as $rss_item) { // If number of items is lower then limit: Parse one item if ($i < $this->items_limit || $this->items_limit == 0) { foreach($this->itemtags as $itemtag) { $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)'si", $rss_item); if ($temp != '') $result['items'][$i][$itemtag] = $temp; // Set only if not empty } // Strip HTML tags and other bullshit from DESCRIPTION if ($this->stripHTML && $result['items'][$i]['description']) $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description']))); // Strip HTML tags and other bullshit from TITLE if ($this->stripHTML && $result['items'][$i]['title']) $result['items'][$i]['title'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['title']))); // If date_format is specified and pubDate is valid if ($this->date_format != '' && ($timestamp = strtotime($result['items'][$i]['pubDate'])) !==-1) { // convert pubDate to specified date format $result['items'][$i]['pubDate'] = date($this->date_format, $timestamp); } // Item counter $i++; } } $result['items_count'] = $i; return $result; } else // Error in opening return False { $this->e->notice('no rss content found at: '.$rss_url); return False; } } } ?>