| Server IP : 172.67.178.83 / Your IP : 216.73.217.141 Web Server : Apache System : Linux hosting01.arsenalhost.com 4.18.0-425.13.1.lve.el8.x86_64 #1 SMP Mon Feb 27 15:23:24 EST 2023 x86_64 User : corbizre ( 1013) PHP Version : 7.4.33 Disable Function : exec,passthru,shell_exec,system MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON Directory : /home/corbizre/estatearcheology.com/console/controllers/ |
Upload File : |
<?php
namespace console\controllers;
use common\models\Image;
use console\models\Housestable;
use console\models\Hspars2HouseTable;
use console\models\Hspars2ImageTable;
use console\models\Urltable;
use console\parsers\Coldwellbanker;
use console\parsers\Hudhomestore;
use console\parsers\Main;
use console\parsers\Mobilehomes;
use Yii;
use console\models\AngryCurl;
use console\parsers\Movoto;
use console\parsers\Realestatebook;
class ParserController extends \yii\console\Controller
{
function init()
{
if (!defined('DS')) define('DS',DIRECTORY_SEPARATOR);
ini_set('display_errors', 'On');
ini_set('max_execution_time', 0);
ini_set('memory_limit', '128M');
set_time_limit(0);
}
public function actionGetUrl($ID,$countStates= null)
{
Main::setOptions($ID, null);
define('ACTION', 'get-url');
echo "# STAGE 1 - GETTING URL " . SOURCE . "\n";
echo "# Scaning...\n";
switch (SOURCE_ID) {
case 1: // www.hudhomestore.com
$arrSoursUrl = Hudhomestore::getUrls();
$this->CURL($arrSoursUrl);
break;
case 2: //www.coldwellbanker.com
$arrSoursUrl = Coldwellbanker::getUrls(); // get list of Urls
Urltable::addUrlToTable($arrSoursUrl); // write list of Urls to Table Urltable
echo "ALL DONE\n";
exit();
break;
case 3:
http://www.movoto.com
// My way:
//$arrListStateLinks = Movoto::getListStateUrls($countStates); // get States
//$arrSoursUrl = Movoto::getCityUrls($arrListStateLinks); // Get Sity's Urls
//$result = Movoto::GetAllPages($arrSoursUrl);
//Urltable::AddUrlToTable($arrSoursUrl);
// Alex way:
$arrSoursUrl = Movoto::getUrlFromXml();
$this->CURL($arrSoursUrl);
break;
case 4: //https://mobilehomes-for-sale.com
define('STEP1',1);
$arrSoursUrl = Mobilehomes::getUrls();
$this->CURL($arrSoursUrl);
$arrSoursUrl = Mobilehomes::getUrlDetail();
define('STEP2',2);
$this->CURL($arrSoursUrl);
break;
return;
case 5: //https://www.realestatebook.com
//ver2:
Realestatebook::getStateAndSity();
$arrSoursUrl = Realestatebook::getCityJson();
break;
return;
//ver1:
//Realestatebook::getProxi();
// $arrSoursUrl = Realestatebook::getUrls();
// foreach($arrSoursUrl as $url){
// $ouput = Main::Parser($url);
// $arrUrl = Realestatebook::GetUrlFromOutput($ouput,$url);
// Urltable::addUrlToTable($arrUrl);
// }
break;
return;
default:
break;
}
}
/**
* $action=1 (get houses by last updated date) | $action=2 (get houses by last full_md5_address to get all records from remote db
* $processing - is variable to get max count of houses at one time
*
* @param $action
* @param $processing
* @return void
*/
public function actionImport($action = "1", $processing = "100")
{
$i=0;
$house = false;
if ($action == "1") {
$lastLocalHouse = Housestable::find()->orderBy('updated_at DESC')->limit(1)->one();
if (!$lastLocalHouse) {
$lastLocalHouse = new \stdClass();
}
if ($lastLocalHouse->updated_at < '2011-11-13 03:10:14') {
$lastLocalHouse->updated_at = '2011-11-13 03:10:14';
}
echo "LAST UPDATE: " . $lastLocalHouse->updated_at . "\n";
// $housesCount = Hspars2HouseTable::find()->where(['>=', 'last_update', $lastLocalHouse->updated_at])->count();
// echo "HOUSES for UPDATE:".$housesCount."\n";
// echo "iterations_number=" . $iterations_number = ceil($housesCount / $processing)."\n";
// for ($iteration = 0; $iteration < $iterations_number + 1; $iteration++) {
// $houses = Hspars2HouseTable::find()->where(['>=', 'last_update', $lastLocalHouse->updated_at])->limit($processing)->offset($iteration * $processing)->all();
$houses = Hspars2HouseTable::find()->where(['is_commercial' => false])->where(['>=', 'last_update', $lastLocalHouse->updated_at])->limit($processing)->all();
} elseif ($action == "2") {
$lastLocalHouse = Housestable::find()->orderBy('ID DESC')->limit(1)->one();
if ($lastLocalHouse->full_address_md5) {
$lastRemoteHouse = Hspars2HouseTable::find()->where(['is_commercial' => false])->where(['full_address_md5' => $lastLocalHouse->full_address_md5])->limit(1)->one();
$houses = Hspars2HouseTable::find()->where(['is_commercial' => false])->orderBy('"ID" ASC')->where(['>', '"ID"', $lastRemoteHouse->ID])->limit($processing)->all();
} else {
$houses = Hspars2HouseTable::find()->where(['is_commercial' => false])->orderBy('"ID" ASC')->limit($processing)->all();
}
}
if (!$houses){
$lastLocalHouse = Housestable::find()->orderBy('updated_at DESC')->limit(1)->one();
if (!$lastLocalHouse) {
$lastLocalHouse = new \stdClass();
}
if ($lastLocalHouse->updated_at < '2011-11-13 03:10:14') {
$lastLocalHouse->updated_at = '2011-11-13 03:10:14';
}
echo "LAST UPDATE: " . $lastLocalHouse->updated_at . "\n";
$houses = Hspars2HouseTable::find()->where(['is_commercial' => false])->where(['>=', 'last_update', $lastLocalHouse->updated_at])->limit($processing)->all();
}
$houseIds = array();
foreach ($houses as $house) {
$houseIds[] = $house->ID;
}
$hsparsImages = Hspars2ImageTable::find()->where(['in', 'house_id', $houseIds])->all();
$images = array();
foreach ($hsparsImages as $image) {
$images['images'][$image->house_id][] = $image;
$images['urls'][$image->house_id][] = $image->url;
}
foreach ($houses as $house) {
$Bed_bath = (isset($house->bedrooms) && !empty($house->bedrooms) ? $house->bedrooms : NULL) . '/' . (isset($house->bathrooms) && !empty($house->bathrooms) ? $house->bathrooms : NULL);
$agent = serialize(array('Company Name' => $house->brokerage_firm,
'Contact Name' => $house->agent,
'Address' => $house->brokerage_address,
'Phone Number' => $house->brokerage_phone,
'Fax Number' => '',
'Email' => $house->brokerage_email,
'Website' => '',
'Additional Comments' => ''
));
$broker = serialize(array('Company Name' => $house->brokerage_firm,
'Contact Name' => $house->agent,
'Address' => $house->brokerage_address,
'Phone Number' => $house->brokerage_phone,
'Fax Number' => '',
'Email' => $house->brokerage_email,
'Website' => '',
'Additional Comments' => ''
));
// Поиск дубликата
$model = Housestable::getHouseByFullAddressMd5($house->full_address_md5);
if (!$model) {
$model = new Housestable();
$model->created_at = time();
$model->full_address_md5 = $house->full_address_md5;
// Помечаю запись как прочитанную
Urltable::setUrlProced($house->url, $model->ID);
} else {
echo "Already in." . " House.Id: " . $model->ID, PHP_EOL;
// Обновляю дату
Urltable::setUrlProced($house->url);
$model->created_at = strtotime($model->created_at);
}
$model->CaseNumber = $house->property_id;
$model->Mls = $house->MLSID;
$model->Status = $house->status;
$model->Address = $house->address;
$model->State = $house->state;
$model->City = $house->city;
$model->Zip = $house->zip;
$model->County = $house->county;
$model->updated_at = strtotime($house->last_update);
$model->Square_Feet = $house->area;
$model->Bed_bath = $Bed_bath !== '/' ? $Bed_bath : NULL;
$model->Total_rooms = $house->rooms;
$model->Year = (int)$house->construction_year;
$model->Type = $house->type;
$model->Number_of_story = $house->stories;
$model->HOA_Fees = $house->hoa_fees;
$model->Revitalization = null;
$model->Parking = $house->parking_spaces;
$model->Patio_Deck = $house->patio;
$model->Listing = null;
$model->Agent = $agent;
$model->Manager = null;
$model->Broker = $broker;
$model->Images = serialize($images['urls'][$house->ID]);
$model->Latitude = null;
$model->Longitude = null;
$model->Roof = $house->roof;
$model->Lot_Size = $house->lot_size;
$model->Air_Conditioning = $house->air;
$model->Price = str_replace("$", null, $house->price);
$model->Amenities = null;
$model->Exterior = $house->exterior;
$model->Flooring = $house->flooring;
$model->Heat = $house->heating_type;
$model->Description = $house->additional_info;
$model->School = null;
$model->Neighborhood = null;
$model->Total = null;
$model->Other = null;
if ($model->save()) {
echo "SAVED: HOUSE \n";
// сохраняю фотки
Image::updateImages($images['images'][$house->ID], $house->state, $house->zip, $house->ID, $house->image_url, $house->image_url_orig);
} else {
echo "error!!!\n";
}
}
// }
}
public function actionStart($ID, $countIteration=null, $offset=null)
{
Main::setOptions($ID,$countIteration);
define('ACTION', 'parsing');
echo "# STAGE 2 - PARSING URL " . SOURCE;
echo " (limit: " . GLOBAL_LIMIT_PER_ACTION . ")\n";
$arrSoursUrl = Urltable::getListNotParsingYet($offset);
if($ID == 5 && !empty($arrSoursUrl)){ // realestatebook
foreach($arrSoursUrl as $url){
$output = Main::Parser($url->url);
$result = Realestatebook::ParsingURL($output,$url->url);
if(!empty($result)) {
Realestatebook::AddUrlToTableHouses($result);
}
}
die('All done');
}
$this->CURLSTART($arrSoursUrl);
}
public function CURL($arrSoursUrl)
{
$AC = new AngryCurl('callback_function');
//$AC->init_console();
$AC->load_useragent_list(Yii::getAlias('@console') . DIRECTORY_SEPARATOR . 'models' . DIRECTORY_SEPARATOR . 'useragent_list.txt');
if($arrSoursUrl) {
$i = 0;
foreach ($arrSoursUrl as $url) {
//Main::defineArrProxi();
$i++;
$AC->get($url);
$AC->execute(600);
//if (count($arrSoursUrl) <= $i) {
// echo "\n"."ALL DONE.";
//}
//AngryCurl::print_debug();
}
}
unset($AC);
}
public function CURLSTART($arrSoursUrl){
$AC = new AngryCurl('callback_function');
//$AC->init_console();
$AC->load_useragent_list(Yii::getAlias('@console') . DIRECTORY_SEPARATOR . 'models' . DIRECTORY_SEPARATOR . 'useragent_list.txt');
if ($arrSoursUrl) {
$i = 0;
foreach ($arrSoursUrl as $url) {
$i++;
$AC->get($url->url); // �������� ������!!!!
$AC->execute(500);
if (count($arrSoursUrl) <= $i) {
echo "\n"."ALL DONE.",PHP_EOL;
//AngryCurl::print_debug();
unset($AC);
}
}
}
}
// �� ������ ����������� ������������
public function actionTest($ID, $countIteration=null, $offset=null)
{
Main::setOptions($ID,$countIteration);
define('ACTION', 'parsing');
echo "# STAGE 2 - PARSING NON-STOP URL " . SOURCE,PHP_EOL;
echo "# No limit",PHP_EOL;
for ($i = 1; $i <= 10000000000000; $i++) {
$arrSoursUrl = Urltable::getOneNotParsingYet();
if ($ID == 5 && !empty($arrSoursUrl)) { // realestatebook
foreach ($arrSoursUrl as $url) {
$output = Main::Parser($url->url);
$result = Realestatebook::ParsingURL($output, $url->url);
if (!empty($result)) {
Realestatebook::AddUrlToTableHouses($result);
}
}
}else{
//vd($arrSoursUrl);
$this->CURLSTART($arrSoursUrl);
}
}
}
}