|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
using System.Net;
|
|
|
|
|
using System.Text;
|
|
|
|
|
using System.Threading.Tasks;
|
|
|
|
|
using PhilExampleCrawler.Common.TCP.Packets;
|
|
|
|
|
using PhilExampleCrawler.Core.Abstractions.Interfaces;
|
|
|
|
|
|
|
|
|
|
namespace PhilExampleCrawler.TCPAPI.Services
|
|
|
|
|
{
|
|
|
|
|
internal class CrawlingService
|
|
|
|
|
{
|
|
|
|
|
private const int port = 1234;
|
|
|
|
|
private Timer? _crawlRequestTimer;
|
|
|
|
|
private readonly ICrawlingService_HAP _coreCrawler = Core.Crawler.CreateInstance();
|
|
|
|
|
|
|
|
|
|
public void Start()
|
|
|
|
|
{
|
|
|
|
|
if (Program.TCPServer == null)
|
|
|
|
|
throw new InvalidOperationException("Program.TCPServer must not be null when using CrawlingService.");
|
|
|
|
|
|
|
|
|
|
Program.TCPServer.StartReceiveLoop(port);
|
|
|
|
|
_coreCrawler.OnNewInsertionFound += CoreCrawler_OnNewInsertionFound;
|
|
|
|
|
StartCrawlLoop();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void CoreCrawler_OnNewInsertionFound(object? sender, Common.Models.Insertion e)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("New Insertion found: " + e.Href);
|
|
|
|
|
//TODO: We need to know connectionID here
|
|
|
|
|
Program.TCPServer.Send(1, new BasePacket(new NewInsertionLoad() { Href = e.Href }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void StartCrawlLoop()
|
|
|
|
|
{
|
|
|
|
|
if (_crawlRequestTimer != null)
|
|
|
|
|
_crawlRequestTimer.Dispose();
|
|
|
|
|
|
|
|
|
|
var startTimeSpan = TimeSpan.Zero;
|
|
|
|
|
var periodTimeSpan = TimeSpan.FromSeconds(10);
|
|
|
|
|
|
|
|
|
|
_crawlRequestTimer = new Timer((e) =>
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine(GetTimeStamp() + " CrawlingService.StartCrawlLoop() started");
|
|
|
|
|
Crawl();
|
|
|
|
|
Console.WriteLine(GetTimeStamp() + "] CrawlingService.StartCrawlLoop() finished");
|
|
|
|
|
}, null, startTimeSpan, periodTimeSpan);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private void Crawl()
|
|
|
|
|
{
|
|
|
|
|
foreach (var userSess in Program.UserService.RunningSessions) //TODO: .Where(x => x.Value.ValidUntil > DateTime.Now)
|
|
|
|
|
foreach(var crawlSess in userSess.Value.RegisteredCrawlSessions)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine(GetTimeStamp() + " started crawlSess " + crawlSess.ID + ":");
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
_coreCrawler.Crawl(crawlSess.SearchParams, 2 * 1000);
|
|
|
|
|
}
|
|
|
|
|
catch (WebException webEx)
|
|
|
|
|
{
|
|
|
|
|
//TODO: if repeatedly throws for same job or if it happens too often
|
|
|
|
|
// => either inform or increase timeout (at runtime + until limit maybe?)
|
|
|
|
|
if (!(webEx.Status == WebExceptionStatus.Timeout))
|
|
|
|
|
throw;
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
Console.ForegroundColor = ConsoleColor.Red;
|
|
|
|
|
Console.Write("[TODO: LOGGING] "); Console.ForegroundColor = ConsoleColor.White;
|
|
|
|
|
Console.WriteLine("CrawlingService.Crawl() timed out for crawlSessionID " + crawlSess.ID);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Console.WriteLine(GetTimeStamp() + " finished crawlSess " + crawlSess.ID + ".");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void Stop()
|
|
|
|
|
{
|
|
|
|
|
Program.TCPServer.Stop();
|
|
|
|
|
_coreCrawler.StopCrawling();
|
|
|
|
|
|
|
|
|
|
if(_crawlRequestTimer != null)
|
|
|
|
|
_crawlRequestTimer.Dispose();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static string GetTimeStamp() => "[" + DateTime.Now.TimeOfDay.ToString().Substring(0, 8) + "]";
|
|
|
|
|
}
|
|
|
|
|
}
|