*/
// See Issue #1 (http://code.google.com/p/solr-php-client/issues/detail?id=1)
// Doesn't follow typical include path conventions, but is more convenient for users
require_once('Apache/Solr/Document.php');
require_once('Apache/Solr/Response.php');
/**
* Starting point for the Solr API. Represents a Solr server resource and has
* methods for pinging, adding, deleting, committing, optimizing and searching.
*
* Example Usage:
*
* ...
* $solr = new Apache_Solr_Service(); //or explicitly new Apache_Solr_Service('localhost', 8180, '/solr')
*
* if ($solr->ping())
* {
* $solr->deleteByQuery('*:*'); //deletes ALL documents - be careful :)
*
* $document = new Apache_Solr_Document();
* $document->id = uniqid(); //or something else suitably unique
*
* $document->title = 'Some Title';
* $document->content = 'Some content for this wonderful document. Blah blah blah.';
*
* $solr->addDocument($document); //if you're going to be adding documents in bulk using addDocuments
* //with an array of documents is faster
*
* $solr->commit(); //commit to see the deletes and the document
* $solr->optimize(); //merges multiple segments into one
*
* //and the one we all care about, search!
* //any other common or custom parameters to the request handler can go in the
* //optional 4th array argument.
* $solr->search('content:blah', 0, 10, array('sort' => 'timestamp desc'));
* }
* ...
*
*
* @todo Investigate using other HTTP clients other than file_get_contents built-in handler. Could provide performance
* improvements when dealing with multiple requests by using HTTP's keep alive functionality
*/
class Apache_Solr_Service
{
/**
* SVN Revision meta data for this class
*/
const SVN_REVISION = '$Revision: 22 $';
/**
* SVN ID meta data for this class
*/
const SVN_ID = '$Id$';
/**
* Response version we support
*/
const SOLR_VERSION = '1.2';
/**
* Response writer we'll request - JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning
*/
const SOLR_WRITER = 'json';
/**
* NamedList Treatment constants
*/
const NAMED_LIST_FLAT = 'flat';
const NAMED_LIST_MAP = 'map';
/**
* Search HTTP Methods
*/
const METHOD_GET = 'GET';
const METHOD_POST = 'POST';
/**
* Servlet mappings
*/
const PING_SERVLET = 'admin/ping';
const UPDATE_SERVLET = 'update';
const SEARCH_SERVLET = 'select';
const THREADS_SERVLET = 'admin/threads';
/**
* Server identification strings
*
* @var string
*/
protected $_host, $_port, $_path;
/**
* Whether {@link Apache_Solr_Response} objects should create {@link Apache_Solr_Document}s in
* the returned parsed data
*
* @var boolean
*/
protected $_createDocuments = true;
/**
* Whether {@link Apache_Solr_Response} objects should have multivalue fields with only a single value
* collapsed to appear as a single value would.
*
* @var boolean
*/
protected $_collapseSingleValueArrays = true;
/**
* How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values
* are {@link Apache_Solr_Service::NAMED_LIST_MAP} (default) or {@link Apache_Solr_Service::NAMED_LIST_FLAT}.
*
* @var string
*/
protected $_namedListTreatment = self::NAMED_LIST_MAP;
/**
* Query delimiters. Someone might want to be able to change
* these (to use & instead of & for example), so I've provided them.
*
* @var string
*/
protected $_queryDelimiter = '?', $_queryStringDelimiter = '&';
/**
* Constructed servlet full path URLs
*
* @var string
*/
protected $_pingUrl, $_updateUrl, $_searchUrl, $_threadsUrl;
/**
* Keep track of whether our URLs have been constructed
*
* @var boolean
*/
protected $_urlsInited = false;
/**
* Reusable stream context resources for GET and POST operations
*
* @var resource
*/
protected $_getContext, $_postContext;
/**
* Default HTTP timeout when one is not specified (initialized to default_socket_timeout ini setting)
*
* var float
*/
protected $_defaultTimeout;
/**
* Escape a value for special query characters such as ':', '(', ')', '*', '?', etc.
*
* NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead
*
* @param string $value
* @return string
*/
static public function escape($value)
{
//list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters
$pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/';
$replace = '\\\$1';
return preg_replace($pattern, $replace, $value);
}
/**
* Escape a value meant to be contained in a phrase for special query characters
*
* @param string $value
* @return string
*/
static public function escapePhrase($value)
{
$pattern = '/("|\\\)/';
$replace = '\\\$1';
return preg_replace($pattern, $replace, $value);
}
/**
* Convenience function for creating phrase syntax from a value
*
* @param string $value
* @return string
*/
static public function phrase($value)
{
return '"' . self::escapePhrase($value) . '"';
}
/**
* Constructor. All parameters are optional and will take on default values
* if not specified.
*
* @param string $host
* @param string $port
* @param string $path
*/
public function __construct($host = 'localhost', $port = 8180, $path = '/solr/')
{
$this->setHost($host);
$this->setPort($port);
$this->setPath($path);
$this->_initUrls();
// create our shared get and post stream contexts
$this->_getContext = stream_context_create();
$this->_postContext = stream_context_create();
// determine our default http timeout from ini settings
$this->_defaultTimeout = (int) ini_get('default_socket_timeout');
// double check we didn't get 0 for a timeout
if ($this->_defaultTimeout <= 0)
{
$this->_defaultTimeout = 60;
}
}
/**
* Return a valid http URL given this server's host, port and path and a provided servlet name
*
* @param string $servlet
* @return string
*/
protected function _constructUrl($servlet, $params = array())
{
if (count($params))
{
//escape all parameters appropriately for inclusion in the query string
$escapedParams = array();
foreach ($params as $key => $value)
{
$escapedParams[] = urlencode($key) . '=' . urlencode($value);
}
$queryString = $this->_queryDelimiter . implode($this->_queryStringDelimiter, $escapedParams);
}
else
{
$queryString = '';
}
return 'http://' . $this->_host . ':' . $this->_port . $this->_path . $servlet . $queryString;
}
/**
* Construct the Full URLs for the three servlets we reference
*/
protected function _initUrls()
{
//Initialize our full servlet URLs now that we have server information
$this->_pingUrl = $this->_constructUrl(self::PING_SERVLET);
$this->_updateUrl = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => self::SOLR_WRITER ));
$this->_searchUrl = $this->_constructUrl(self::SEARCH_SERVLET);
$this->_threadsUrl = $this->_constructUrl(self::THREADS_SERVLET, array('wt' => self::SOLR_WRITER ));
$this->_urlsInited = true;
}
/**
* Central method for making a get operation against this Solr Server
*
* @param string $url
* @param float $timeout Read timeout in seconds
* @return Apache_Solr_Response
*
* @throws Exception If a non 200 response status is returned
*/
protected function _sendRawGet($url, $timeout = FALSE)
{
// set the timeout if specified
if ($timeout !== FALSE && $timeout > 0.0)
{
// timeouts with file_get_contents seem to need
// to be halved to work as expected
$timeout = (float) $timeout / 2;
stream_context_set_option($this->_getContext, 'http', 'timeout', $timeout);
}
else
{
// use the default timeout pulled from default_socket_timeout otherwise
stream_context_set_option($this->_getContext, 'http', 'timeout', $this->_defaultTimeout);
}
//$http_response_header is set by file_get_contents
$response = new Apache_Solr_Response(@file_get_contents($url, false, $this->_getContext), $http_response_header, $this->_createDocuments, $this->_collapseSingleValueArrays);
if ($response->getHttpStatus() != 200)
{
throw new Exception('"' . $response->getHttpStatus() . '" Status: ' . $response->getHttpStatusMessage(), $response->getHttpStatus());
}
return $response;
}
/**
* Central method for making a post operation against this Solr Server
*
* @param string $url
* @param string $rawPost
* @param float $timeout Read timeout in seconds
* @param string $contentType
* @return Apache_Solr_Response
*
* @throws Exception If a non 200 response status is returned
*/
protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8')
{
stream_context_set_option($this->_postContext, array(
'http' => array(
// set HTTP method
'method' => 'POST',
// Add our posted content type
'header' => "Content-Type: $contentType",
// the posted content
'content' => $rawPost,
// default timeout
'timeout' => $this->_defaultTimeout
)
)
);
// set the timeout if specified
if ($timeout !== FALSE && $timeout > 0.0)
{
// timeouts with file_get_contents seem to need
// to be halved to work as expected
$timeout = (float) $timeout / 2;
stream_context_set_option($this->_postContext, 'http', 'timeout', $timeout);
}
//$http_response_header is set by file_get_contents
$response = new Apache_Solr_Response(@file_get_contents($url, false, $this->_postContext), $http_response_header, $this->_createDocuments, $this->_collapseSingleValueArrays);
if ($response->getHttpStatus() != 200)
{
throw new Exception('"' . $response->getHttpStatus() . '" Status: ' . $response->getHttpStatusMessage(), $response->getHttpStatus());
}
return $response;
}
/**
* Returns the set host
*
* @return string
*/
public function getHost()
{
return $this->_host;
}
/**
* Set the host used. If empty will fallback to constants
*
* @param string $host
*/
public function setHost($host)
{
//Use the provided host or use the default
if (empty($host))
{
throw new Exception('Host parameter is empty');
}
else
{
$this->_host = $host;
}
if ($this->_urlsInited)
{
$this->_initUrls();
}
}
/**
* Get the set port
*
* @return integer
*/
public function getPort()
{
return $this->_port;
}
/**
* Set the port used. If empty will fallback to constants
*
* @param integer $port
*/
public function setPort($port)
{
//Use the provided port or use the default
$port = (int) $port;
if ($port <= 0)
{
throw new Exception('Port is not a valid port number');
}
else
{
$this->_port = $port;
}
if ($this->_urlsInited)
{
$this->_initUrls();
}
}
/**
* Get the set path.
*
* @return string
*/
public function getPath()
{
return $this->_path;
}
/**
* Set the path used. If empty will fallback to constants
*
* @param string $path
*/
public function setPath($path)
{
$path = trim($path, '/');
$this->_path = '/' . $path . '/';
if ($this->_urlsInited)
{
$this->_initUrls();
}
}
/**
* Set the create documents flag. This determines whether {@link Apache_Solr_Response} objects will
* parse the response and create {@link Apache_Solr_Document} instances in place.
*
* @param unknown_type $createDocuments
*/
public function setCreateDocuments($createDocuments)
{
$this->_createDocuments = (bool) $createDocuments;
}
/**
* Get the current state of teh create documents flag.
*
* @return boolean
*/
public function getCreateDocuments()
{
return $this->_createDocuments;
}
/**
* Set the collapse single value arrays flag.
*
* @param boolean $collapseSingleValueArrays
*/
public function setCollapseSingleValueArrays($collapseSingleValueArrays)
{
$this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays;
}
/**
* Get the current state of the collapse single value arrays flag.
*
* @return boolean
*/
public function getCollapseSingleValueArrays()
{
return $this->_collapseSingleValueArrays;
}
/**
* Set how NamedLists should be formatted in the response data. This mainly effects
* the facet counts format.
*
* @param string $namedListTreatment
* @throws Exception If invalid option is set
*/
public function setNamedListTreatmet($namedListTreatment)
{
switch ((string) $namedListTreatment)
{
case Apache_Solr_Service::NAMED_LIST_FLAT:
$this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_FLAT;
break;
case Apache_Solr_Service::NAMED_LIST_MAP:
$this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_MAP;
break;
default:
throw new Exception('Not a valid named list treatement option');
}
}
/**
* Get the current setting for named list treatment.
*
* @return string
*/
public function getNamedListTreatment()
{
return $this->_namedListTreatment;
}
/**
* Set the string used to separate the path form the query string.
* Defaulted to '?'
*
* @param string $queryDelimiter
*/
public function setQueryDelimiter($queryDelimiter)
{
$this->_queryDelimiter = $queryDelimiter;
}
/**
* Set the string used to separate the parameters in thequery string
* Defaulted to '&'
*
* @param string $queryStringDelimiter
*/
public function setQueryStringDelimiter($queryStringDelimiter)
{
$this->_queryStringDelimiter = $queryStringDelimiter;
}
/**
* Call the /admin/ping servlet, can be used to quickly tell if a connection to the
* server is able to be made.
*
* @param float $timeout maximum time to wait for ping in seconds, -1 for unlimited (default is 2)
* @return float Actual time taken to ping the server, FALSE if timeout or HTTP error status occurs
*/
public function ping($timeout = 2)
{
$start = microtime(true);
// when using timeout in context and file_get_contents
// it seems to take twice the timout value
$timeout = (float) $timeout / 2;
if ($timeout <= 0.0)
{
$timeout = -1;
}
$context = stream_context_create(
array(
'http' => array(
'method' => 'HEAD',
'timeout' => $timeout
)
)
);
// attempt a HEAD request to the solr ping page
$ping = @file_get_contents($this->_pingUrl, false, $context);
// result is false if there was a timeout
// or if the HTTP status was not 200
if ($ping !== false)
{
return microtime(true) - $start;
}
else
{
return false;
}
}
/**
* Call the /admin/threads servlet and retrieve information about all threads in the
* Solr servlet's thread group. Useful for diagnostics.
*
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function threads()
{
return $this->_sendRawGet($this->_threadsUrl);
}
/**
* Raw Add Method. Takes a raw post body and sends it to the update service. Post body
* should be a complete and well formed "add" xml document.
*
* @param string $rawPost
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function add($rawPost)
{
return $this->_sendRawPost($this->_updateUrl, $rawPost);
}
/**
* Add a Solr Document to the index
*
* @param Apache_Solr_Document $document
* @param boolean $allowDups
* @param boolean $overwritePending
* @param boolean $overwriteCommitted
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true)
{
$dupValue = $allowDups ? 'true' : 'false';
$pendingValue = $overwritePending ? 'true' : 'false';
$committedValue = $overwriteCommitted ? 'true' : 'false';
$rawPost = '';
$rawPost .= $this->_documentToXmlFragment($document);
$rawPost .= '';
return $this->add($rawPost);
}
/**
* Add an array of Solr Documents to the index all at once
*
* @param array $documents Should be an array of Apache_Solr_Document instances
* @param boolean $allowDups
* @param boolean $overwritePending
* @param boolean $overwriteCommitted
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true)
{
$dupValue = $allowDups ? 'true' : 'false';
$pendingValue = $overwritePending ? 'true' : 'false';
$committedValue = $overwriteCommitted ? 'true' : 'false';
$rawPost = '';
foreach ($documents as $document)
{
if ($document instanceof Apache_Solr_Document)
{
$rawPost .= $this->_documentToXmlFragment($document);
}
}
$rawPost .= '';
return $this->add($rawPost);
}
/**
* Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call
*
* @return string
*/
protected function _documentToXmlFragment(Apache_Solr_Document $document)
{
$xml = 'getBoost() !== false)
{
$xml .= ' boost="' . $document->getBoost() . '"';
}
$xml .= '>';
foreach ($document as $key => $value)
{
$key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8');
$fieldBoost = $document->getFieldBoost($key);
if (is_array($value))
{
foreach ($value as $multivalue)
{
$xml .= '';
}
}
else
{
$xml .= '';
}
}
$xml .= '';
// replace any control characters to avoid Solr XML parser exception
return $this->_stripCtrlChars($xml);
}
/**
* Replace control (non-printable) characters from string that are invalid to Solr's XML parser with a space.
*
* @param string $string
* @return string
*/
protected function _stripCtrlChars($string)
{
// See: http://w3.org/International/questions/qa-forms-utf-8.html
// Printable utf-8 does not include any of these chars below x7F
return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $string);
}
/**
* Send a commit command. Will be synchronous unless both wait parameters are set to false.
*
* @param boolean $optimize Defaults to true
* @param boolean $waitFlush Defaults to true
* @param boolean $waitSearcher Defaults to true
* @param float $timeout Maximum expected duration (in seconds) of the commit operation on the server (otherwise, will throw a communication exception). Defaults to 1 hour
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function commit($optimize = true, $waitFlush = true, $waitSearcher = true, $timeout = 3600)
{
$optimizeValue = $optimize ? 'true' : 'false';
$flushValue = $waitFlush ? 'true' : 'false';
$searcherValue = $waitSearcher ? 'true' : 'false';
$rawPost = '';
return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
}
/**
* Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be
* a complete and well formed "delete" xml document
*
* @param string $rawPost Expected to be utf-8 encoded xml document
* @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function delete($rawPost, $timeout = 3600)
{
return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
}
/**
* Create a delete document based on document ID
*
* @param string $id Expected to be utf-8 encoded
* @param boolean $fromPending
* @param boolean $fromCommitted
* @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function deleteById($id, $fromPending = true, $fromCommitted = true, $timeout = 3600)
{
$pendingValue = $fromPending ? 'true' : 'false';
$committedValue = $fromCommitted ? 'true' : 'false';
//escape special xml characters
$id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8');
$rawPost = '' . $id . '';
return $this->delete($rawPost, $timeout);
}
/**
* Create and post a delete document based on multiple document IDs.
*
* @param array $ids Expected to be utf-8 encoded strings
* @param boolean $fromPending
* @param boolean $fromCommitted
* @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function deleteByMultipleIds($ids, $fromPending = true, $fromCommitted = true, $timeout = 3600)
{
$pendingValue = $fromPending ? 'true' : 'false';
$committedValue = $fromCommitted ? 'true' : 'false';
$rawPost = '';
foreach ($ids as $id)
{
//escape special xml characters
$id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8');
$rawPost .= '' . $id . '';
}
$rawPost .= '';
return $this->delete($rawPost, $timeout);
}
/**
* Create a delete document based on a query and submit it
*
* @param string $rawQuery Expected to be utf-8 encoded
* @param boolean $fromPending
* @param boolean $fromCommitted
* @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true, $timeout = 3600)
{
$pendingValue = $fromPending ? 'true' : 'false';
$committedValue = $fromCommitted ? 'true' : 'false';
// escape special xml characters
$rawQuery = htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8');
$rawPost = '' . $rawQuery . '';
return $this->delete($rawPost, $timeout);
}
/**
* Send an optimize command. Will be synchronous unless both wait parameters are set
* to false.
*
* @param boolean $waitFlush
* @param boolean $waitSearcher
* @param float $timeout Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600)
{
$flushValue = $waitFlush ? 'true' : 'false';
$searcherValue = $waitSearcher ? 'true' : 'false';
$rawPost = '';
return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
}
/**
* Simple Search interface
*
* @param string $query The raw query string
* @param int $offset The starting offset for result documents
* @param int $limit The maximum number of result documents to return
* @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field)
* @return Apache_Solr_Response
*
* @throws Exception If an error occurs during the service call
*/
public function search($query, $offset = 0, $limit = 10, $params = array(), $method = self::METHOD_GET)
{
if (!is_array($params))
{
$params = array();
}
// construct our full parameters
// sending the version is important in case the format changes
$params['version'] = self::SOLR_VERSION;
// common parameters in this interface
$params['wt'] = self::SOLR_WRITER;
$params['json.nl'] = $this->_namedListTreatment;
$params['q'] = $query;
$params['start'] = $offset;
$params['rows'] = $limit;
// use http_build_query to encode our arguments because its faster
// than urlencoding all the parts ourselves in a loop
$queryString = http_build_query($params, null, $this->_queryStringDelimiter);
// because http_build_query treats arrays differently than we want to, correct the query
// string by changing foo[#]=bar (# being an actual number) parameter strings to just
// multiple foo=bar strings. This regex should always work since '=' will be urlencoded
// anywhere else the regex isn't expecting it
$queryString = preg_replace('/%5B(?:[0-9]|[1-9][0-9]+)%5D=/', '=', $queryString);
if ($method == self::METHOD_GET)
{
return $this->_sendRawGet($this->_searchUrl . $this->_queryDelimiter . $queryString);
}
else if ($method == self::METHOD_POST)
{
return $this->_sendRawPost($this->_searchUrl, $queryString, FALSE, 'application/x-www-form-urlencoded');
}
else
{
throw new Exception("Unsupported method '$method', please use the Apache_Solr_Service::METHOD_* constants");
}
}
}