1
0
Fork 0
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
3.4 KiB

using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;
using PhilExampleCrawler.Common.TCP.Packets;
using PhilExampleCrawler.Core.Abstractions.Interfaces;
namespace PhilExampleCrawler.TCPAPI.Services
{
internal class CrawlingService
{
private const int port = 1234;
private Timer? _crawlRequestTimer;
private readonly ICrawlingService_HAP _coreCrawler = Core.Crawler.CreateInstance();
public void Start()
{
if (Program.TCPServer == null)
throw new InvalidOperationException("Program.TCPServer must not be null when using CrawlingService.");
Program.TCPServer.StartReceiveLoop(port);
_coreCrawler.OnNewInsertionFound += CoreCrawler_OnNewInsertionFound;
StartCrawlLoop();
}
private void CoreCrawler_OnNewInsertionFound(object? sender, Common.Models.Insertion e)
{
Console.WriteLine("New Insertion found: " + e.Href);
//TODO: We need to know connectionID here
Program.TCPServer.Send(1, new BasePacket(new NewInsertionLoad() { Href = e.Href }));
}
private void StartCrawlLoop()
{
if (_crawlRequestTimer != null)
_crawlRequestTimer.Dispose();
var startTimeSpan = TimeSpan.Zero;
var periodTimeSpan = TimeSpan.FromSeconds(10);
_crawlRequestTimer = new Timer((e) =>
{
Console.WriteLine(GetTimeStamp() + " CrawlingService.StartCrawlLoop() started");
Crawl();
Console.WriteLine(GetTimeStamp() + "] CrawlingService.StartCrawlLoop() finished");
}, null, startTimeSpan, periodTimeSpan);
}
private void Crawl()
{
foreach (var userSess in Program.UserService.RunningSessions) //TODO: .Where(x => x.Value.ValidUntil > DateTime.Now)
foreach(var crawlSess in userSess.Value.RegisteredCrawlSessions)
{
Console.WriteLine(GetTimeStamp() + " started crawlSess " + crawlSess.ID + ":");
try
{
_coreCrawler.Crawl(crawlSess.SearchParams, 2 * 1000);
}
catch (WebException webEx)
{
//TODO: if repeatedly throws for same job or if it happens too often
// => either inform or increase timeout (at runtime + until limit maybe?)
if (!(webEx.Status == WebExceptionStatus.Timeout))
throw;
else
{
Console.ForegroundColor = ConsoleColor.Red;
Console.Write("[TODO: LOGGING] "); Console.ForegroundColor = ConsoleColor.White;
Console.WriteLine("CrawlingService.Crawl() timed out for crawlSessionID " + crawlSess.ID);
}
}
Console.WriteLine(GetTimeStamp() + " finished crawlSess " + crawlSess.ID + ".");
}
}
public void Stop()
{
Program.TCPServer.Stop();
_coreCrawler.StopCrawling();
if(_crawlRequestTimer != null)
_crawlRequestTimer.Dispose();
}
private static string GetTimeStamp() => "[" + DateTime.Now.TimeOfDay.ToString().Substring(0, 8) + "]";
}
}