sdilej

  1from __future__ import annotations
  2import logging
  3import bs4
  4from src.download import *
  5from basic_colors import *
  6from src.downloader.page_search import *
  7from src.link_to_file import Link_to_file, compare_sizes
  8
  9class Sdilej_downloader(Download_page_search):
 10    """
 11    Downloader from: sdilej.cz
 12    """
 13    webpage = "https://sdilej.cz"
 14
 15    logger = logging.getLogger("Sdilej_downloader")
 16    if not logger.hasHandlers():
 17        os.makedirs("logs", exist_ok=True)
 18        handler = logging.FileHandler("logs/sdilej_downloader.log", encoding="utf-8")
 19        formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
 20        handler.setFormatter(formatter)
 21        logger.addHandler(handler)
 22        logger.setLevel(logging.INFO)
 23    
 24    def __init__(self):
 25        pass
 26    
 27    def search(self, prompt, file_type="all", search_type="relevance") -> 'Generator[Link_to_file, None, None]':
 28        if prompt is None or prompt.strip() == "":
 29            raise ValueError("Prompt cannot be empty.")
 30        url = Sdilej_downloader.generate_search_url(prompt, file_type, search_type)
 31        Sdilej_downloader.logger.info(f"Searching Sdilej with URL: {url}")
 32        response = requests.get(url)
 33        Sdilej_downloader.logger.info(f"Response received: {response.status_code}")
 34        return Sdilej_downloader.parse_catalogue(response)
 35
 36    @staticmethod
 37    def generate_search_url(prompt, file_type="all", search_type="relevance"):
 38        """
 39        generate url from input
 40        """
 41        return f"{Sdilej_downloader.webpage}/{prompt}/s/{Sdilej_downloader.file_types[file_type]}-{Sdilej_downloader.search_types[search_type]}"
 42
 43    @staticmethod
 44    def get_atributes_from_catalogue(soup) -> Link_to_file:
 45        try:
 46            link = soup.find("a").get("href")
 47            title = soup.find("a").get("title")
 48            size = soup.find_all("p")[1].text
 49            link_2_file = Link_to_file(title, link, size, Sdilej_downloader)
 50        except Exception as e:
 51            raise ValueError("ERROR: unable to parse atributes." + str(e))
 52        return link_2_file
 53
 54    @staticmethod
 55    def get_atributes_from_file_page(soup) -> Link_to_file:
 56        try:
 57            title = soup.find("h1").text
 58            size = soup.find("b").next_sibling.replace("|", "").strip()
 59            link = Sdilej_downloader.webpage+str(soup.find("a", class_="btn btn-danger").get("href"))
 60            link_2_file = Link_to_file(title, link, size, Sdilej_downloader)
 61        except Exception as e:
 62            raise ValueError("Download button not found on detail page." + str(e))
 63        return link_2_file
 64    
 65    @staticmethod
 66    def get_download_link_from_detail(detail_url: str) -> str:
 67        """
 68        Získá přímý odkaz ke stažení ze stránky s detailem souboru na sdilej.cz.
 69        """
 70        page = download_page(detail_url)
 71        soup = bs4.BeautifulSoup(page.text, "html.parser")
 72        # Najdi tlačítko pro stažení
 73        download_btn = soup.find("a", class_="btn btn-danger")
 74        if not download_btn:
 75            raise ValueError("Download button not found on detail page for: {}".format(detail_url))
 76        download_link = Sdilej_downloader.webpage + str(download_btn.get("href"))
 77        return download_link
 78
 79    @staticmethod
 80    def is_valid_download_page(page) -> bool:
 81        """
 82        Stránka neplatná, pokud obsahuje: 
 83        <h1 class="red">Stahuj a nahrávej soubory neomezenou rychlostí</h1>
 84        "Tento soubor byl smazán."
 85        """
 86        soup = bs4.BeautifulSoup(page.text, "html.parser")
 87        invalid_texts = (
 88            "Stahuj a nahrávej soubory neomezenou rychlostí", 
 89            "Chyba 404 Nenalezeno",
 90            "Tento soubor byl smazán."
 91        )
 92        page_title = soup.find("h1", class_="red")
 93        if page_title is not None and page_title.text in invalid_texts:
 94            return False
 95        
 96        soup = remove_style(soup)
 97        page_txt = soup.find("div", class_="content")
 98        if page_txt is not None:
 99            text = remove_empty_lines(page_txt.text)
100            if any_text_coresponds_to(text, invalid_texts):
101                return False
102        return True
103    
104    @staticmethod
105    def test_downloaded_file(link_2_file, download_folder) -> bool:
106        file_size = os.path.getsize(f"{download_folder}/{link_2_file.title}")
107        if file_size == 0:
108            raise ValueError("ERROR: File is empty.")
109        elif link_2_file.size != None and file_size < 1024:
110            file = os.path.join(download_folder, link_2_file.title)
111            data = open(file, "r", encoding='utf-8').read()
112            return Sdilej_downloader.test_downloaded_data(data)
113        elif link_2_file.size != None and not compare_sizes(file_size, link_2_file.size, 20/100):
114            raise ValueError("ERROR: File size does not match.")
115        return True
116
117    @staticmethod
118    def test_downloaded_data(data) -> bool:
119        """
120        Tests the downloaded data.
121        The data is invalid if a sufficient timeout has occurred.
122        If the page contains:
123        "<script>top.location.href='https://sdilej.cz/free-stahovani';</script>"
124        "<h1 class=\"red\">Stahování více souborů najednou</h1>"
125        """
126        if data is None:
127            raise ValueError("ERROR: No data downloaded.")
128        if "<script>top.location.href='https://sdilej.cz/free-stahovani';</script>" in data:
129            raise InsufficientTimeoutError()
130        if "<h1 class=\"red\">Stahování více souborů najednou</h1>" in data:
131            raise InsufficientTimeoutError()
132        return True
133
134    @staticmethod
135    def parse_file_page(page):
136        if not Sdilej_downloader.is_valid_download_page(page):
137            raise ValueError("Status code: " + str(page.status_code) + ". Invalid download page: no file to download.")
138        soup = bs4.BeautifulSoup(page.text, "html.parser")
139        content = soup.find("div", class_="content")
140        content = soup.find("div", class_="col-md-12 col-sm-12 detail-leftcol")
141        return content
142
143    @staticmethod
144    def parse_catalogue(page) -> 'Generator[Link_to_file, None, None]':
145        """
146        Iterates through the search results page and returns information about the files.
147
148        Yields: Link_to_file
149        """
150        soup = bs4.BeautifulSoup(page.text, "html.parser")
151        content = soup.find("div", class_="row post")
152        if content is None:
153            return None
154        content = remove_style(content)
155        for videobox in content.find_all(class_="videobox-desc"):
156            catalogue_file = None
157            try:
158                catalogue_file = Sdilej_downloader.get_atributes_from_catalogue(videobox)
159                download_page_content = Sdilej_downloader.parse_file_page(download_page(catalogue_file.detail_url))
160                link_2_file = Sdilej_downloader.get_atributes_from_file_page(download_page_content)
161                link_2_file.detail_url = catalogue_file.detail_url  # zachovej původní detail_url!
162                yield link_2_file
163            except ValueError as e:
164                print_error(str(e) + " for file: " + (catalogue_file.title if catalogue_file else "Unknown"), False)
class Sdilej_downloader(src.downloader.page_search.Download_page_search):
 10class Sdilej_downloader(Download_page_search):
 11    """
 12    Downloader from: sdilej.cz
 13    """
 14    webpage = "https://sdilej.cz"
 15
 16    logger = logging.getLogger("Sdilej_downloader")
 17    if not logger.hasHandlers():
 18        os.makedirs("logs", exist_ok=True)
 19        handler = logging.FileHandler("logs/sdilej_downloader.log", encoding="utf-8")
 20        formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
 21        handler.setFormatter(formatter)
 22        logger.addHandler(handler)
 23        logger.setLevel(logging.INFO)
 24    
 25    def __init__(self):
 26        pass
 27    
 28    def search(self, prompt, file_type="all", search_type="relevance") -> 'Generator[Link_to_file, None, None]':
 29        if prompt is None or prompt.strip() == "":
 30            raise ValueError("Prompt cannot be empty.")
 31        url = Sdilej_downloader.generate_search_url(prompt, file_type, search_type)
 32        Sdilej_downloader.logger.info(f"Searching Sdilej with URL: {url}")
 33        response = requests.get(url)
 34        Sdilej_downloader.logger.info(f"Response received: {response.status_code}")
 35        return Sdilej_downloader.parse_catalogue(response)
 36
 37    @staticmethod
 38    def generate_search_url(prompt, file_type="all", search_type="relevance"):
 39        """
 40        generate url from input
 41        """
 42        return f"{Sdilej_downloader.webpage}/{prompt}/s/{Sdilej_downloader.file_types[file_type]}-{Sdilej_downloader.search_types[search_type]}"
 43
 44    @staticmethod
 45    def get_atributes_from_catalogue(soup) -> Link_to_file:
 46        try:
 47            link = soup.find("a").get("href")
 48            title = soup.find("a").get("title")
 49            size = soup.find_all("p")[1].text
 50            link_2_file = Link_to_file(title, link, size, Sdilej_downloader)
 51        except Exception as e:
 52            raise ValueError("ERROR: unable to parse atributes." + str(e))
 53        return link_2_file
 54
 55    @staticmethod
 56    def get_atributes_from_file_page(soup) -> Link_to_file:
 57        try:
 58            title = soup.find("h1").text
 59            size = soup.find("b").next_sibling.replace("|", "").strip()
 60            link = Sdilej_downloader.webpage+str(soup.find("a", class_="btn btn-danger").get("href"))
 61            link_2_file = Link_to_file(title, link, size, Sdilej_downloader)
 62        except Exception as e:
 63            raise ValueError("Download button not found on detail page." + str(e))
 64        return link_2_file
 65    
 66    @staticmethod
 67    def get_download_link_from_detail(detail_url: str) -> str:
 68        """
 69        Získá přímý odkaz ke stažení ze stránky s detailem souboru na sdilej.cz.
 70        """
 71        page = download_page(detail_url)
 72        soup = bs4.BeautifulSoup(page.text, "html.parser")
 73        # Najdi tlačítko pro stažení
 74        download_btn = soup.find("a", class_="btn btn-danger")
 75        if not download_btn:
 76            raise ValueError("Download button not found on detail page for: {}".format(detail_url))
 77        download_link = Sdilej_downloader.webpage + str(download_btn.get("href"))
 78        return download_link
 79
 80    @staticmethod
 81    def is_valid_download_page(page) -> bool:
 82        """
 83        Stránka neplatná, pokud obsahuje: 
 84        <h1 class="red">Stahuj a nahrávej soubory neomezenou rychlostí</h1>
 85        "Tento soubor byl smazán."
 86        """
 87        soup = bs4.BeautifulSoup(page.text, "html.parser")
 88        invalid_texts = (
 89            "Stahuj a nahrávej soubory neomezenou rychlostí", 
 90            "Chyba 404 Nenalezeno",
 91            "Tento soubor byl smazán."
 92        )
 93        page_title = soup.find("h1", class_="red")
 94        if page_title is not None and page_title.text in invalid_texts:
 95            return False
 96        
 97        soup = remove_style(soup)
 98        page_txt = soup.find("div", class_="content")
 99        if page_txt is not None:
100            text = remove_empty_lines(page_txt.text)
101            if any_text_coresponds_to(text, invalid_texts):
102                return False
103        return True
104    
105    @staticmethod
106    def test_downloaded_file(link_2_file, download_folder) -> bool:
107        file_size = os.path.getsize(f"{download_folder}/{link_2_file.title}")
108        if file_size == 0:
109            raise ValueError("ERROR: File is empty.")
110        elif link_2_file.size != None and file_size < 1024:
111            file = os.path.join(download_folder, link_2_file.title)
112            data = open(file, "r", encoding='utf-8').read()
113            return Sdilej_downloader.test_downloaded_data(data)
114        elif link_2_file.size != None and not compare_sizes(file_size, link_2_file.size, 20/100):
115            raise ValueError("ERROR: File size does not match.")
116        return True
117
118    @staticmethod
119    def test_downloaded_data(data) -> bool:
120        """
121        Tests the downloaded data.
122        The data is invalid if a sufficient timeout has occurred.
123        If the page contains:
124        "<script>top.location.href='https://sdilej.cz/free-stahovani';</script>"
125        "<h1 class=\"red\">Stahování více souborů najednou</h1>"
126        """
127        if data is None:
128            raise ValueError("ERROR: No data downloaded.")
129        if "<script>top.location.href='https://sdilej.cz/free-stahovani';</script>" in data:
130            raise InsufficientTimeoutError()
131        if "<h1 class=\"red\">Stahování více souborů najednou</h1>" in data:
132            raise InsufficientTimeoutError()
133        return True
134
135    @staticmethod
136    def parse_file_page(page):
137        if not Sdilej_downloader.is_valid_download_page(page):
138            raise ValueError("Status code: " + str(page.status_code) + ". Invalid download page: no file to download.")
139        soup = bs4.BeautifulSoup(page.text, "html.parser")
140        content = soup.find("div", class_="content")
141        content = soup.find("div", class_="col-md-12 col-sm-12 detail-leftcol")
142        return content
143
144    @staticmethod
145    def parse_catalogue(page) -> 'Generator[Link_to_file, None, None]':
146        """
147        Iterates through the search results page and returns information about the files.
148
149        Yields: Link_to_file
150        """
151        soup = bs4.BeautifulSoup(page.text, "html.parser")
152        content = soup.find("div", class_="row post")
153        if content is None:
154            return None
155        content = remove_style(content)
156        for videobox in content.find_all(class_="videobox-desc"):
157            catalogue_file = None
158            try:
159                catalogue_file = Sdilej_downloader.get_atributes_from_catalogue(videobox)
160                download_page_content = Sdilej_downloader.parse_file_page(download_page(catalogue_file.detail_url))
161                link_2_file = Sdilej_downloader.get_atributes_from_file_page(download_page_content)
162                link_2_file.detail_url = catalogue_file.detail_url  # zachovej původní detail_url!
163                yield link_2_file
164            except ValueError as e:
165                print_error(str(e) + " for file: " + (catalogue_file.title if catalogue_file else "Unknown"), False)

Downloader from: sdilej.cz

webpage = 'https://sdilej.cz'
logger = <Logger Sdilej_downloader (INFO)>
def search( self, prompt, file_type='all', search_type='relevance') -> Generator[src.link_to_file.Link_to_file, NoneType, NoneType]:
28    def search(self, prompt, file_type="all", search_type="relevance") -> 'Generator[Link_to_file, None, None]':
29        if prompt is None or prompt.strip() == "":
30            raise ValueError("Prompt cannot be empty.")
31        url = Sdilej_downloader.generate_search_url(prompt, file_type, search_type)
32        Sdilej_downloader.logger.info(f"Searching Sdilej with URL: {url}")
33        response = requests.get(url)
34        Sdilej_downloader.logger.info(f"Response received: {response.status_code}")
35        return Sdilej_downloader.parse_catalogue(response)

Search for files on the website.

@staticmethod
def generate_search_url(prompt, file_type='all', search_type='relevance'):
37    @staticmethod
38    def generate_search_url(prompt, file_type="all", search_type="relevance"):
39        """
40        generate url from input
41        """
42        return f"{Sdilej_downloader.webpage}/{prompt}/s/{Sdilej_downloader.file_types[file_type]}-{Sdilej_downloader.search_types[search_type]}"

generate url from input

@staticmethod
def get_atributes_from_catalogue(soup) -> src.link_to_file.Link_to_file:
44    @staticmethod
45    def get_atributes_from_catalogue(soup) -> Link_to_file:
46        try:
47            link = soup.find("a").get("href")
48            title = soup.find("a").get("title")
49            size = soup.find_all("p")[1].text
50            link_2_file = Link_to_file(title, link, size, Sdilej_downloader)
51        except Exception as e:
52            raise ValueError("ERROR: unable to parse atributes." + str(e))
53        return link_2_file
@staticmethod
def get_atributes_from_file_page(soup) -> src.link_to_file.Link_to_file:
55    @staticmethod
56    def get_atributes_from_file_page(soup) -> Link_to_file:
57        try:
58            title = soup.find("h1").text
59            size = soup.find("b").next_sibling.replace("|", "").strip()
60            link = Sdilej_downloader.webpage+str(soup.find("a", class_="btn btn-danger").get("href"))
61            link_2_file = Link_to_file(title, link, size, Sdilej_downloader)
62        except Exception as e:
63            raise ValueError("Download button not found on detail page." + str(e))
64        return link_2_file
@staticmethod
def is_valid_download_page(page) -> bool:
 80    @staticmethod
 81    def is_valid_download_page(page) -> bool:
 82        """
 83        Stránka neplatná, pokud obsahuje: 
 84        <h1 class="red">Stahuj a nahrávej soubory neomezenou rychlostí</h1>
 85        "Tento soubor byl smazán."
 86        """
 87        soup = bs4.BeautifulSoup(page.text, "html.parser")
 88        invalid_texts = (
 89            "Stahuj a nahrávej soubory neomezenou rychlostí", 
 90            "Chyba 404 Nenalezeno",
 91            "Tento soubor byl smazán."
 92        )
 93        page_title = soup.find("h1", class_="red")
 94        if page_title is not None and page_title.text in invalid_texts:
 95            return False
 96        
 97        soup = remove_style(soup)
 98        page_txt = soup.find("div", class_="content")
 99        if page_txt is not None:
100            text = remove_empty_lines(page_txt.text)
101            if any_text_coresponds_to(text, invalid_texts):
102                return False
103        return True

Stránka neplatná, pokud obsahuje:

Stahuj a nahrávej soubory neomezenou rychlostí

"Tento soubor byl smazán."

@staticmethod
def test_downloaded_file(link_2_file, download_folder) -> bool:
105    @staticmethod
106    def test_downloaded_file(link_2_file, download_folder) -> bool:
107        file_size = os.path.getsize(f"{download_folder}/{link_2_file.title}")
108        if file_size == 0:
109            raise ValueError("ERROR: File is empty.")
110        elif link_2_file.size != None and file_size < 1024:
111            file = os.path.join(download_folder, link_2_file.title)
112            data = open(file, "r", encoding='utf-8').read()
113            return Sdilej_downloader.test_downloaded_data(data)
114        elif link_2_file.size != None and not compare_sizes(file_size, link_2_file.size, 20/100):
115            raise ValueError("ERROR: File size does not match.")
116        return True
@staticmethod
def test_downloaded_data(data) -> bool:
118    @staticmethod
119    def test_downloaded_data(data) -> bool:
120        """
121        Tests the downloaded data.
122        The data is invalid if a sufficient timeout has occurred.
123        If the page contains:
124        "<script>top.location.href='https://sdilej.cz/free-stahovani';</script>"
125        "<h1 class=\"red\">Stahování více souborů najednou</h1>"
126        """
127        if data is None:
128            raise ValueError("ERROR: No data downloaded.")
129        if "<script>top.location.href='https://sdilej.cz/free-stahovani';</script>" in data:
130            raise InsufficientTimeoutError()
131        if "<h1 class=\"red\">Stahování více souborů najednou</h1>" in data:
132            raise InsufficientTimeoutError()
133        return True

Tests the downloaded data. The data is invalid if a sufficient timeout has occurred. If the page contains: "" "

Stahování více souborů najednou

"

@staticmethod
def parse_file_page(page):
135    @staticmethod
136    def parse_file_page(page):
137        if not Sdilej_downloader.is_valid_download_page(page):
138            raise ValueError("Status code: " + str(page.status_code) + ". Invalid download page: no file to download.")
139        soup = bs4.BeautifulSoup(page.text, "html.parser")
140        content = soup.find("div", class_="content")
141        content = soup.find("div", class_="col-md-12 col-sm-12 detail-leftcol")
142        return content
@staticmethod
def parse_catalogue(page) -> Generator[src.link_to_file.Link_to_file, NoneType, NoneType]:
144    @staticmethod
145    def parse_catalogue(page) -> 'Generator[Link_to_file, None, None]':
146        """
147        Iterates through the search results page and returns information about the files.
148
149        Yields: Link_to_file
150        """
151        soup = bs4.BeautifulSoup(page.text, "html.parser")
152        content = soup.find("div", class_="row post")
153        if content is None:
154            return None
155        content = remove_style(content)
156        for videobox in content.find_all(class_="videobox-desc"):
157            catalogue_file = None
158            try:
159                catalogue_file = Sdilej_downloader.get_atributes_from_catalogue(videobox)
160                download_page_content = Sdilej_downloader.parse_file_page(download_page(catalogue_file.detail_url))
161                link_2_file = Sdilej_downloader.get_atributes_from_file_page(download_page_content)
162                link_2_file.detail_url = catalogue_file.detail_url  # zachovej původní detail_url!
163                yield link_2_file
164            except ValueError as e:
165                print_error(str(e) + " for file: " + (catalogue_file.title if catalogue_file else "Unknown"), False)

Iterates through the search results page and returns information about the files.

Yields: Link_to_file