webpage = 'https://sdilej.cz'

logger = <Logger Sdilej_downloader (INFO)>

def search( self, prompt, file_type='all', search_type='relevance') -> Generator[src.link_to_file.Link_to_file, NoneType, NoneType]: View Source

28    def search(self, prompt, file_type="all", search_type="relevance") -> 'Generator[Link_to_file, None, None]':
29        if prompt is None or prompt.strip() == "":
30            raise ValueError("Prompt cannot be empty.")
31        url = Sdilej_downloader.generate_search_url(prompt, file_type, search_type)
32        Sdilej_downloader.logger.info(f"Searching Sdilej with URL: {url}")
33        response = requests.get(url)
34        Sdilej_downloader.logger.info(f"Response received: {response.status_code}")
35        return Sdilej_downloader.parse_catalogue(response)

Search for files on the website.

@staticmethod

def generate_search_url(prompt, file_type='all', search_type='relevance'): View Source

37    @staticmethod
38    def generate_search_url(prompt, file_type="all", search_type="relevance"):
39        """
40        generate url from input
41        """
42        return f"{Sdilej_downloader.webpage}/{prompt}/s/{Sdilej_downloader.file_types[file_type]}-{Sdilej_downloader.search_types[search_type]}"

generate url from input

@staticmethod

def get_atributes_from_catalogue(soup) -> src.link_to_file.Link_to_file: View Source

44    @staticmethod
45    def get_atributes_from_catalogue(soup) -> Link_to_file:
46        try:
47            link = soup.find("a").get("href")
48            title = soup.find("a").get("title")
49            size = soup.find_all("p")[1].text
50            link_2_file = Link_to_file(title, link, size, Sdilej_downloader)
51        except Exception as e:
52            raise ValueError("ERROR: unable to parse atributes." + str(e))
53        return link_2_file

@staticmethod

def get_atributes_from_file_page(soup) -> src.link_to_file.Link_to_file: View Source

55    @staticmethod
56    def get_atributes_from_file_page(soup) -> Link_to_file:
57        try:
58            title = soup.find("h1").text
59            size = soup.find("b").next_sibling.replace("|", "").strip()
60            link = Sdilej_downloader.webpage+str(soup.find("a", class_="btn btn-danger").get("href"))
61            link_2_file = Link_to_file(title, link, size, Sdilej_downloader)
62        except Exception as e:
63            raise ValueError("Download button not found on detail page." + str(e))
64        return link_2_file

@staticmethod

def get_download_link_from_detail(detail_url: str) -> str: View Source

66    @staticmethod
67    def get_download_link_from_detail(detail_url: str) -> str:
68        """
69        Získá přímý odkaz ke stažení ze stránky s detailem souboru na sdilej.cz.
70        """
71        page = download_page(detail_url)
72        soup = bs4.BeautifulSoup(page.text, "html.parser")
73        # Najdi tlačítko pro stažení
74        download_btn = soup.find("a", class_="btn btn-danger")
75        if not download_btn:
76            raise ValueError("Download button not found on detail page for: {}".format(detail_url))
77        download_link = Sdilej_downloader.webpage + str(download_btn.get("href"))
78        return download_link

Získá přímý odkaz ke stažení ze stránky s detailem souboru na sdilej.cz.

@staticmethod

def is_valid_download_page(page) -> bool: View Source

 80    @staticmethod
 81    def is_valid_download_page(page) -> bool:
 82        """
 83        Stránka neplatná, pokud obsahuje: 
 84        <h1 class="red">Stahuj a nahrávej soubory neomezenou rychlostí</h1>
 85        "Tento soubor byl smazán."
 86        """
 87        soup = bs4.BeautifulSoup(page.text, "html.parser")
 88        invalid_texts = (
 89            "Stahuj a nahrávej soubory neomezenou rychlostí", 
 90            "Chyba 404 Nenalezeno",
 91            "Tento soubor byl smazán."
 92        )
 93        page_title = soup.find("h1", class_="red")
 94        if page_title is not None and page_title.text in invalid_texts:
 95            return False
 96        
 97        soup = remove_style(soup)
 98        page_txt = soup.find("div", class_="content")
 99        if page_txt is not None:
100            text = remove_empty_lines(page_txt.text)
101            if any_text_coresponds_to(text, invalid_texts):
102                return False
103        return True

Stránka neplatná, pokud obsahuje:

Stahuj a nahrávej soubory neomezenou rychlostí

"Tento soubor byl smazán."

@staticmethod

def test_downloaded_file(link_2_file, download_folder) -> bool: View Source

105    @staticmethod
106    def test_downloaded_file(link_2_file, download_folder) -> bool:
107        file_size = os.path.getsize(f"{download_folder}/{link_2_file.title}")
108        if file_size == 0:
109            raise ValueError("ERROR: File is empty.")
110        elif link_2_file.size != None and file_size < 1024:
111            file = os.path.join(download_folder, link_2_file.title)
112            data = open(file, "r", encoding='utf-8').read()
113            return Sdilej_downloader.test_downloaded_data(data)
114        elif link_2_file.size != None and not compare_sizes(file_size, link_2_file.size, 20/100):
115            raise ValueError("ERROR: File size does not match.")
116        return True

@staticmethod

def test_downloaded_data(data) -> bool: View Source

118    @staticmethod
119    def test_downloaded_data(data) -> bool:
120        """
121        Tests the downloaded data.
122        The data is invalid if a sufficient timeout has occurred.
123        If the page contains:
124        "<script>top.location.href='https://sdilej.cz/free-stahovani';</script>"
125        "<h1 class=\"red\">Stahování více souborů najednou</h1>"
126        """
127        if data is None:
128            raise ValueError("ERROR: No data downloaded.")
129        if "<script>top.location.href='https://sdilej.cz/free-stahovani';</script>" in data:
130            raise InsufficientTimeoutError()
131        if "<h1 class=\"red\">Stahování více souborů najednou</h1>" in data:
132            raise InsufficientTimeoutError()
133        return True

Tests the downloaded data. The data is invalid if a sufficient timeout has occurred. If the page contains: "" "

Stahování více souborů najednou

"

@staticmethod

def parse_file_page(page): View Source

135    @staticmethod
136    def parse_file_page(page):
137        if not Sdilej_downloader.is_valid_download_page(page):
138            raise ValueError("Status code: " + str(page.status_code) + ". Invalid download page: no file to download.")
139        soup = bs4.BeautifulSoup(page.text, "html.parser")
140        content = soup.find("div", class_="content")
141        content = soup.find("div", class_="col-md-12 col-sm-12 detail-leftcol")
142        return content

@staticmethod

def parse_catalogue(page) -> Generator[src.link_to_file.Link_to_file, NoneType, NoneType]: View Source

144    @staticmethod
145    def parse_catalogue(page) -> 'Generator[Link_to_file, None, None]':
146        """
147        Iterates through the search results page and returns information about the files.
148
149        Yields: Link_to_file
150        """
151        soup = bs4.BeautifulSoup(page.text, "html.parser")
152        content = soup.find("div", class_="row post")
153        if content is None:
154            return None
155        content = remove_style(content)
156        for videobox in content.find_all(class_="videobox-desc"):
157            catalogue_file = None
158            try:
159                catalogue_file = Sdilej_downloader.get_atributes_from_catalogue(videobox)
160                download_page_content = Sdilej_downloader.parse_file_page(download_page(catalogue_file.detail_url))
161                link_2_file = Sdilej_downloader.get_atributes_from_file_page(download_page_content)
162                link_2_file.detail_url = catalogue_file.detail_url  # zachovej původní detail_url!
163                yield link_2_file
164            except ValueError as e:
165                print_error(str(e) + " for file: " + (catalogue_file.title if catalogue_file else "Unknown"), False)

Iterates through the search results page and returns information about the files.

Yields: Link_to_file