press-garden/feed.php

274 lines
6.1 KiB
PHP

<?php
/**
* RSS for PHP - small and easy-to-use library for consuming an RSS Feed
*
* @copyright Copyright (c) 2008 David Grudl
* @license New BSD License
* @version 1.5
*/
class Feed
{
/** @var int */
public static $cacheExpire = '1 day';
/** @var string */
public static $cacheDir;
/** @var string */
public static $userAgent = 'FeedFetcher-Google';
/** @var SimpleXMLElement */
protected $xml;
/**
* Loads RSS or Atom feed.
* @param string
* @param string
* @param string
* @return Feed
* @throws FeedException
*/
public static function load($url, $user = null, $pass = null)
{
$xml = self::loadXml($url, $user, $pass);
if ($xml->channel) {
return self::fromRss($xml);
} else {
return self::fromAtom($xml);
}
}
/**
* Loads RSS feed.
* @param string RSS feed URL
* @param string optional user name
* @param string optional password
* @return Feed
* @throws FeedException
*/
public static function loadRss($url, $user = null, $pass = null)
{
return self::fromRss(self::loadXml($url, $user, $pass));
}
/**
* Loads Atom feed.
* @param string Atom feed URL
* @param string optional user name
* @param string optional password
* @return Feed
* @throws FeedException
*/
public static function loadAtom($url, $user = null, $pass = null)
{
return self::fromAtom(self::loadXml($url, $user, $pass));
}
private static function fromRss(SimpleXMLElement $xml)
{
if (!$xml->channel) {
throw new FeedException('Invalid feed.');
}
self::adjustNamespaces($xml);
foreach ($xml->channel->item as $item) {
// converts namespaces to dotted tags
self::adjustNamespaces($item);
// generate 'url' & 'timestamp' tags
$item->url = (string) $item->link;
if (isset($item->{'dc:date'})) {
$item->timestamp = strtotime($item->{'dc:date'});
} elseif (isset($item->pubDate)) {
$item->timestamp = strtotime($item->pubDate);
}
}
$feed = new self;
$feed->xml = $xml->channel;
return $feed;
}
private static function fromAtom(SimpleXMLElement $xml)
{
if (!in_array('http://www.w3.org/2005/Atom', $xml->getDocNamespaces(), true)
&& !in_array('http://purl.org/atom/ns#', $xml->getDocNamespaces(), true)
) {
throw new FeedException('Invalid feed.');
}
// generate 'url' & 'timestamp' tags
foreach ($xml->entry as $entry) {
$entry->url = (string) $entry->link['href'];
$entry->timestamp = strtotime($entry->updated);
}
$feed = new self;
$feed->xml = $xml;
return $feed;
}
/**
* Returns property value. Do not call directly.
* @param string tag name
* @return SimpleXMLElement
*/
public function __get($name)
{
return $this->xml->{$name};
}
/**
* Sets value of a property. Do not call directly.
* @param string property name
* @param mixed property value
* @return void
*/
public function __set($name, $value)
{
throw new Exception("Cannot assign to a read-only property '$name'.");
}
/**
* Converts a SimpleXMLElement into an array.
* @param SimpleXMLElement
* @return array
*/
public function toArray(SimpleXMLElement $xml = null)
{
if ($xml === null) {
$xml = $this->xml;
}
if (!$xml->children()) {
return (string) $xml;
}
$arr = [];
foreach ($xml->children() as $tag => $child) {
if (count($xml->$tag) === 1) {
$arr[$tag] = $this->toArray($child);
} else {
$arr[$tag][] = $this->toArray($child);
}
}
return $arr;
}
/**
* Load XML from cache or HTTP.
* @param string
* @param string
* @param string
* @return SimpleXMLElement
* @throws FeedException
*/
private static function loadXml($url, $user, $pass)
{
$e = self::$cacheExpire;
$cacheFile = self::$cacheDir . '/feed.' . md5(serialize(func_get_args())) . '.xml';
if (self::$cacheDir
&& (time() - @filemtime($cacheFile) <= (is_string($e) ? strtotime($e) - time() : $e))
&& $data = @file_get_contents($cacheFile)
) {
// ok
} elseif ($data = trim(self::httpRequest($url, $user, $pass))) {
if (self::$cacheDir) {
file_put_contents($cacheFile, $data);
}
} elseif (self::$cacheDir && $data = @file_get_contents($cacheFile)) {
// ok
} else {
throw new FeedException('Cannot load feed.');
}
return new SimpleXMLElement($data, LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NOCDATA);
}
/**
* Process HTTP request.
* @param string
* @param string
* @param string
* @return string|false
* @throws FeedException
*/
private static function httpRequest($url, $user, $pass)
{
if (extension_loaded('curl')) {
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
if ($user !== null || $pass !== null) {
curl_setopt($curl, CURLOPT_USERPWD, "$user:$pass");
}
curl_setopt($curl, CURLOPT_USERAGENT, self::$userAgent); // some feeds require a user agent
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_ENCODING, '');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); // no echo, just return result
curl_setopt($curl, CURLOPT_USERAGENT, '');
if (!ini_get('open_basedir')) {
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); // sometime is useful :)
}
$result = curl_exec($curl);
return curl_errno($curl) === 0 && curl_getinfo($curl, CURLINFO_HTTP_CODE) === 200
? $result
: false;
} else {
$context = null;
if ($user !== null && $pass !== null) {
$options = [
'http' => [
'method' => 'GET',
'header' => 'Authorization: Basic ' . base64_encode($user . ':' . $pass) . "\r\n",
],
];
$context = stream_context_create($options);
}
return file_get_contents($url, false, $context);
}
}
/**
* Generates better accessible namespaced tags.
* @param SimpleXMLElement
* @return void
*/
private static function adjustNamespaces($el)
{
foreach ($el->getNamespaces(true) as $prefix => $ns) {
if ($prefix === '') {
continue;
}
$children = $el->children($ns);
foreach ($children as $tag => $content) {
$el->{$prefix . ':' . $tag} = $content;
}
}
}
}
/**
* An exception generated by Feed.
*/
class FeedException extends Exception
{
}