diff --git a/dev.py b/dev.py
new file mode 100644
index 0000000..e57f248
--- /dev/null
+++ b/dev.py
@@ -0,0 +1,4 @@
+from src.Watcher import Watcher
+
+if __name__ == "__main__":
+    Watcher("./optar/sites.txt", "./optar/keywords.txt").watch()
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index c9044db..d1c12ed 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ deepdiff==7.0.1
 lxml==5.2.2
 requests==2.32.3
 trafilatura==1.11.0
-beautifulsoup4==4.12.3
\ No newline at end of file
+beautifulsoup4==4.12.3
+boto3==1.34.144
\ No newline at end of file
diff --git a/src/SiteReader.py b/src/SiteReader.py
index 9f6abae..b95696e 100644
--- a/src/SiteReader.py
+++ b/src/SiteReader.py
@@ -51,11 +51,9 @@ class SiteReader:
 
         downloaded_url = trafilatura.fetch_url(url)
         try:
-            a = trafilatura.extract(downloaded_url, output_format="json", with_metadata=True, include_comments=False,
-                                    date_extraction_params={'extensive_search': True, 'original_date': True})
+            a = trafilatura.extract(downloaded_url, output_format="json", with_metadata=True, include_comments=False)
         except AttributeError:
-            a = trafilatura.extract(downloaded_url, output_format="json", with_metadata=True,
-                                    date_extraction_params={'extensive_search': True, 'original_date': True})
+            a = trafilatura.extract(downloaded_url, output_format="json", with_metadata=True)
         if a:
             json_output = json.loads(a)
             return json_output['text']
diff --git a/src/SiteStoreS3.py b/src/SiteStoreS3.py
new file mode 100644
index 0000000..93f6f6c
--- /dev/null
+++ b/src/SiteStoreS3.py
@@ -0,0 +1,36 @@
+import json
+import os
+from pathlib import Path
+from typing import List, Optional
+import boto3
+
+
+class SiteStoreS3:
+    def __init__(self, bucket):
+        self.bucket = bucket
+
+    def get_site_history(self, cache_path) -> Optional[list[str]]:
+        # Make sure you provide / in the end
+        prefix = cache_path
+        if cache_path[-1] != "/":
+            prefix += "/"
+
+        s3 = boto3.client("s3")
+        result = s3.list_objects_v2(Bucket=self.bucket, Prefix=cache_path, MaxKeys=21)
+        if "Contents"not in result:
+            return None
+        # return a sorted list of file names (key), which are the creation dates, ignore the prefix (len(cache_path)), ignore the first element, as this is only the prefix
+        return sorted([x["Key"][len(cache_path) :] for x in result["Contents"][1:]])
+
+    def get_site_links(self, path):
+        s3 = boto3.resource('s3')
+        obj = s3.Object(self.bucket,path)
+        data=obj.get()['Body']
+        return json.load(data)
+
+    def persist(self, path, data):
+        s3 = boto3.resource('s3')
+        s3object = s3.Object(self.bucket, path)
+        s3object.put(
+            Body=(bytes(json.dumps(data).encode('UTF-8')))
+        )
\ No newline at end of file
diff --git a/src/Watcher.py b/src/Watcher.py
index e6c9db9..74bda2a 100644
--- a/src/Watcher.py
+++ b/src/Watcher.py
@@ -5,12 +5,12 @@ from deepdiff import DeepDiff
 
 from src.Crawler import Crawler
 from src.SiteReader import SiteReader
-from src.SiteStore import SiteStore
+from src.SiteStoreS3 import SiteStoreS3
 
 
 class Watcher:
     def __init__(self, sites_source_path, keywords_source_path) -> None:
-        self.site_store = SiteStore()
+        self.site_store = SiteStoreS3("optar-dev-cache")
         self.site_reader = SiteReader()
         self.keywords_source_path = keywords_source_path
         self.sites_source_path = sites_source_path
@@ -35,7 +35,7 @@ class Watcher:
             for site in sites:
                 crawler = Crawler()
                 crawler.run(site, 1)
-                crawler.persist(f"./cache/{self.remove_protocol(site)}/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json")
+                self.site_store.persist(f"{self.remove_protocol(site)}/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json", crawler.get_nodes())
 
             contents = [self.get_new_content(site) for site in sites]
             # TODO: improve handleing of None
@@ -56,16 +56,16 @@ class Watcher:
 
     def get_new_content(self, url) -> Dict[str, str]:
         """ get all past iterations of a site by the fully qualified domain name """
-        list_of_files = self.site_store.get_site_history(f"./cache/{self.remove_protocol(url)}/")
+        list_of_files = self.site_store.get_site_history(f"{self.remove_protocol(url)}/")
 
         if len(list_of_files) >= 2:
-            prev_version = self.site_store.get_site_links(f"./cache/{self.remove_protocol(url)}/{list_of_files[-2]}")
-            current_version = self.site_store.get_site_links(f"./cache/{self.remove_protocol(url)}/{list_of_files[-1]}")
+            prev_version = self.site_store.get_site_links(f"{self.remove_protocol(url)}/{list_of_files[-2]}")
+            current_version = self.site_store.get_site_links(f"{self.remove_protocol(url)}/{list_of_files[-1]}")
             news = DeepDiff(prev_version, current_version, ignore_order=True)
         else:
-            news = self.site_store.get_site_links(f"./cache/{self.remove_protocol(url)}/{list_of_files[-1]}")
-
-        sites_contents = self.site_reader.get_sites_content_static(list(news.keys()))
+            news = self.site_store.get_site_links(f"{self.remove_protocol(url)}/{list_of_files[-1]}")
+        if news:
+            sites_contents = self.site_reader.get_sites_content_static([z.split("'")[1] for z in list(news["dictionary_item_added"])])
 
         return sites_contents