prehrajto

View Source

  1import bs4
  2import re
  3import logging
  4import requests
  5import urllib.parse
  6from basic_colors import *
  7from src.downloader.page_search import *
  8from src.link_to_file import Link_to_file
  9
 10class Prehrajto_downloader(Download_page_search):
 11    """
 12    Downloader from: prehraj.to
 13    """
 14    webpage = "https://prehraj.to"
 15
 16    logger = logging.getLogger("Prehrajto_downloader")
 17    if not logger.hasHandlers():
 18        os.makedirs("logs", exist_ok=True)
 19        handler = logging.FileHandler("logs/prehrajto_downloader.log", encoding="utf-8")
 20        formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
 21        handler.setFormatter(formatter)
 22        logger.addHandler(handler)
 23        logger.setLevel(logging.INFO)
 24    
 25    def __init__(self):
 26        pass
 27
 28    @staticmethod
 29    def generate_search_url(prompt, file_type="video", search_type=None):
 30        """
 31        Vygeneruje URL pro hledání na prehraj.to.
 32        """
 33        prompt = prompt.strip().replace(" ", "%20")
 34        return f"https://prehraj.to/hledej/{prompt}"
 35    
 36    def search(self, prompt, file_type="all", search_type="relevance") -> 'Generator[Link_to_file, None, None]':
 37        """
 38        Search for files on Datoid.cz.
 39        Returns a generator of Link_to_file objects.
 40        """
 41        if prompt is None or prompt.strip() == "":
 42            raise ValueError("Prompt cannot be empty.")
 43        url = Prehrajto_downloader.generate_search_url(prompt, file_type, search_type)
 44        Prehrajto_downloader.logger.info(f"Searching Prehrajto with URL: {url}")
 45        response = requests.get(url)
 46        page = response.text
 47        if response.status_code != 200:
 48            raise ValueError(f"Failed to retrieve search results, status code: {response.status_code} for URL: {url}")
 49        return Prehrajto_downloader.parse_catalogue(page)
 50    
 51    @staticmethod
 52    def is_valid_download_page(page) -> bool:
 53        """
 54        Returns False when the page indicates the video is still being processed,
 55        e.g. contains: <div class="status status--success text-center"> Video se zpracovává </div>
 56        Accepts either a requests.Response-like object or raw HTML/text.
 57        """
 58        # Reject non-200 responses
 59        if hasattr(page, "status_code") and page.status_code != 200:
 60            return False
 61
 62        # Get HTML/text
 63        if hasattr(page, "text"):
 64            html = page.text or ""
 65        else:
 66            html = page or ""
 67
 68        if isinstance(html, bytes):
 69            try:
 70                html = html.decode("utf-8", errors="ignore")
 71            except Exception:
 72                html = str(html)
 73
 74        soup = bs4.BeautifulSoup(html, "html.parser")
 75
 76        # remove script/style to avoid false matches
 77        for tag in soup(["script", "style"]):
 78            tag.decompose()
 79
 80        # Check status-like blocks first
 81        status_divs = soup.find_all("div", class_=re.compile(r"\bstatus\b"))
 82        for d in status_divs:
 83            if "video se zpracov" in d.get_text(" ", strip=True).lower():
 84                return False
 85
 86        # Fallback: check whole page text for processing indicators
 87        invalid_texts = (
 88            "Video se zpracovává", 
 89            "video se zpracov",
 90            "zpracováv"
 91        )
 92        
 93        soup = remove_style(soup)
 94        page_text = soup.get_text(" ", strip=True).lower()
 95        if page_text is not None:
 96            text = remove_empty_lines(page_text)
 97            if any_text_coresponds_to(text, invalid_texts):
 98                return False
 99
100        # Ensure there is a download anchor present, e.g.
101        # <a id="frame" href="... ?do=download" class="button cta ...">Stáhnout soubor</a>
102        a_frame = soup.find("a", id="frame")
103        if not a_frame:
104            # try fallback: button/cta anchor containing 'stáhnout' or '?do=download' in href
105            a_frame = soup.find("a", class_=re.compile(r"\b(button|cta)\b"), string=re.compile(r"stáhnout", re.I))
106
107        if not a_frame:
108            return False
109
110        href = a_frame.get("href", "") or ""
111        if "?do=download" not in href and "do=download" not in href:
112            # sometimes href could be absolute or contain params; if no download param, treat as invalid
113            return False
114
115        return True
116
117
118    @staticmethod
119    def get_atributes_from_file_page(soup) -> "Link_to_file":
120        """
121        Parse file page parameters and return Link_to_file(title, url, size, Prehrajto_downloader).
122        Accepts either a BeautifulSoup object, a requests.Response, or raw HTML/text.
123        """
124        # Accept Response or raw HTML as input — normalize to BeautifulSoup
125        soup = normalize_to_beautifulsoup(soup)
126
127        name_label = find_label_span_by_regex(soup, r'^\s*Název souboru[:\s]*$')
128        size_label = find_label_span_by_regex(soup, r'^\s*Velikost[:\s]*$')
129        format_label = find_label_span_by_regex(soup, r'^\s*Formát[:\s]*$')
130
131        name = extract_value_from_label(name_label)
132        size = extract_value_from_label(size_label)
133        fmt = extract_value_from_label(format_label).lower().strip()
134
135        # Normalize filename: append extension if format present and not already there
136        if fmt and name and not re.search(r'\.' + re.escape(fmt) + r'\s*$', name, re.I):
137            name = f"{name}.{fmt}"
138        
139        # Get download anchor href and strip query/fragment (detail URL should NOT contain ?do=download)
140        detail_url = ""
141        a_frame = soup.find("a", id="frame")
142        if a_frame:
143            href = a_frame.get("href") or ""
144            # remove query and fragment
145            parsed = urllib.parse.urlparse(href)
146            clean = urllib.parse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, "", "", ""))
147            # If relative, prepend domain
148            if not clean.startswith("http"):
149                if clean.startswith("/"):
150                    detail_url = f"https://prehraj.to{clean}"
151                elif clean:
152                    detail_url = f"https://prehraj.to/{clean}"
153            else:
154                detail_url = clean
155
156        return Link_to_file(name, detail_url, size, Prehrajto_downloader)    
157
158    @staticmethod
159    def get_atributes_from_catalogue(soup) -> Link_to_file:
160        """
161        Získá informace o videu z katalogového HTML elementu (soup) a vrátí Link_to_file.
162        """
163        a_tag = soup.find("a", class_="video--link")
164        if not a_tag:
165            raise ValueError("Unable to find video link in the provided HTML element.")
166        title = a_tag.get("title") or a_tag.find("h3", class_="video__title").text.strip()
167        href = a_tag.get("href")
168        detail_url = f"https://prehraj.to{href}" if href and not href.startswith("http") else href
169
170        # Najdi velikost souboru
171        size_div = a_tag.find("div", class_="video__tag--size")
172        size = size_div.text.strip() if size_div else ""
173
174        return Link_to_file(title, detail_url, size, Prehrajto_downloader)
175
176    @staticmethod
177    def parse_catalogue(page) -> 'Generator[Link_to_file, None, None]':
178        """
179        Prochází stránku s výsledky vyhledávání (nový formát s <div>) a vrací informace o souborech.
180        yield: Link_to_file
181        """
182
183        def find_next_url(soup_obj):
184            """
185            <a href="/hledej/zakl%C3%ADna%C4%8D?vp-page=2" title="Zobrazit další" class="button cta cta--small">Zobrazit další</a>
186            """
187            a = soup_obj.find("a", class_=re.compile(r"\bbutton\b.*\bcta\b.*\bcta--small\b"), string=re.compile(r"Zobrazit další", re.I))
188            if not a:
189                return None
190            href = a.get("href", "").strip()
191            if not href:
192                return None
193            return urllib.parse.urljoin(Prehrajto_downloader.webpage, href)
194        
195        def process_soup_and_yield(soup_obj):
196            grids = soup_obj.find_all("div", class_="grid-x")
197            for grid in grids:
198                for div in grid.find_all("div", recursive=False):
199                    a_tag = div.find("a", class_="video--link")
200                    link_2_file = None
201                    if a_tag:
202                        try:
203                            link_2_file = Prehrajto_downloader.get_atributes_from_catalogue(div)
204                            detail_page = download_page(link_2_file.detail_url)
205                            if not Prehrajto_downloader.is_valid_download_page(detail_page):
206                                raise ValueError(f"Status code: {detail_page.status_code}. Invalid download page: no file to download.")
207                            link_2_file = Prehrajto_downloader.get_atributes_from_file_page(detail_page)
208                            if link_2_file:
209                                yield link_2_file
210                        except ValueError as e:
211                            print_error(f"{str(e)} for file: {(link_2_file.title if link_2_file else 'Unknown')}", False)
212
213        soup = bs4.BeautifulSoup(page, "html.parser")
214        yield from process_soup_and_yield(soup)
215        
216        next_url = find_next_url(soup)
217        if next_url:
218            try:
219                resp = requests.get(next_url)
220                if resp.status_code != 200:
221                    print_error(f"Failed to retrieve search results, status code: {resp.status_code} for URL: {next_url}", False)
222                else:
223                    #TODO: remove recursion. Use loop instead.
224                    yield from Prehrajto_downloader.parse_catalogue(resp.text)
225            except Exception as e:
226                print_error(f"Failed to fetch next page {next_url}: {e}", False)
227
228    @staticmethod
229    def get_download_link_from_detail(detail_url: str) -> str:
230        return f"{detail_url}?do=download"

class Prehrajto_downloader(src.downloader.page_search.Download_page_search): View Source

 11class Prehrajto_downloader(Download_page_search):
 12    """
 13    Downloader from: prehraj.to
 14    """
 15    webpage = "https://prehraj.to"
 16
 17    logger = logging.getLogger("Prehrajto_downloader")
 18    if not logger.hasHandlers():
 19        os.makedirs("logs", exist_ok=True)
 20        handler = logging.FileHandler("logs/prehrajto_downloader.log", encoding="utf-8")
 21        formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
 22        handler.setFormatter(formatter)
 23        logger.addHandler(handler)
 24        logger.setLevel(logging.INFO)
 25    
 26    def __init__(self):
 27        pass
 28
 29    @staticmethod
 30    def generate_search_url(prompt, file_type="video", search_type=None):
 31        """
 32        Vygeneruje URL pro hledání na prehraj.to.
 33        """
 34        prompt = prompt.strip().replace(" ", "%20")
 35        return f"https://prehraj.to/hledej/{prompt}"
 36    
 37    def search(self, prompt, file_type="all", search_type="relevance") -> 'Generator[Link_to_file, None, None]':
 38        """
 39        Search for files on Datoid.cz.
 40        Returns a generator of Link_to_file objects.
 41        """
 42        if prompt is None or prompt.strip() == "":
 43            raise ValueError("Prompt cannot be empty.")
 44        url = Prehrajto_downloader.generate_search_url(prompt, file_type, search_type)
 45        Prehrajto_downloader.logger.info(f"Searching Prehrajto with URL: {url}")
 46        response = requests.get(url)
 47        page = response.text
 48        if response.status_code != 200:
 49            raise ValueError(f"Failed to retrieve search results, status code: {response.status_code} for URL: {url}")
 50        return Prehrajto_downloader.parse_catalogue(page)
 51    
 52    @staticmethod
 53    def is_valid_download_page(page) -> bool:
 54        """
 55        Returns False when the page indicates the video is still being processed,
 56        e.g. contains: <div class="status status--success text-center"> Video se zpracovává </div>
 57        Accepts either a requests.Response-like object or raw HTML/text.
 58        """
 59        # Reject non-200 responses
 60        if hasattr(page, "status_code") and page.status_code != 200:
 61            return False
 62
 63        # Get HTML/text
 64        if hasattr(page, "text"):
 65            html = page.text or ""
 66        else:
 67            html = page or ""
 68
 69        if isinstance(html, bytes):
 70            try:
 71                html = html.decode("utf-8", errors="ignore")
 72            except Exception:
 73                html = str(html)
 74
 75        soup = bs4.BeautifulSoup(html, "html.parser")
 76
 77        # remove script/style to avoid false matches
 78        for tag in soup(["script", "style"]):
 79            tag.decompose()
 80
 81        # Check status-like blocks first
 82        status_divs = soup.find_all("div", class_=re.compile(r"\bstatus\b"))
 83        for d in status_divs:
 84            if "video se zpracov" in d.get_text(" ", strip=True).lower():
 85                return False
 86
 87        # Fallback: check whole page text for processing indicators
 88        invalid_texts = (
 89            "Video se zpracovává", 
 90            "video se zpracov",
 91            "zpracováv"
 92        )
 93        
 94        soup = remove_style(soup)
 95        page_text = soup.get_text(" ", strip=True).lower()
 96        if page_text is not None:
 97            text = remove_empty_lines(page_text)
 98            if any_text_coresponds_to(text, invalid_texts):
 99                return False
100
101        # Ensure there is a download anchor present, e.g.
102        # <a id="frame" href="... ?do=download" class="button cta ...">Stáhnout soubor</a>
103        a_frame = soup.find("a", id="frame")
104        if not a_frame:
105            # try fallback: button/cta anchor containing 'stáhnout' or '?do=download' in href
106            a_frame = soup.find("a", class_=re.compile(r"\b(button|cta)\b"), string=re.compile(r"stáhnout", re.I))
107
108        if not a_frame:
109            return False
110
111        href = a_frame.get("href", "") or ""
112        if "?do=download" not in href and "do=download" not in href:
113            # sometimes href could be absolute or contain params; if no download param, treat as invalid
114            return False
115
116        return True
117
118
119    @staticmethod
120    def get_atributes_from_file_page(soup) -> "Link_to_file":
121        """
122        Parse file page parameters and return Link_to_file(title, url, size, Prehrajto_downloader).
123        Accepts either a BeautifulSoup object, a requests.Response, or raw HTML/text.
124        """
125        # Accept Response or raw HTML as input — normalize to BeautifulSoup
126        soup = normalize_to_beautifulsoup(soup)
127
128        name_label = find_label_span_by_regex(soup, r'^\s*Název souboru[:\s]*$')
129        size_label = find_label_span_by_regex(soup, r'^\s*Velikost[:\s]*$')
130        format_label = find_label_span_by_regex(soup, r'^\s*Formát[:\s]*$')
131
132        name = extract_value_from_label(name_label)
133        size = extract_value_from_label(size_label)
134        fmt = extract_value_from_label(format_label).lower().strip()
135
136        # Normalize filename: append extension if format present and not already there
137        if fmt and name and not re.search(r'\.' + re.escape(fmt) + r'\s*$', name, re.I):
138            name = f"{name}.{fmt}"
139        
140        # Get download anchor href and strip query/fragment (detail URL should NOT contain ?do=download)
141        detail_url = ""
142        a_frame = soup.find("a", id="frame")
143        if a_frame:
144            href = a_frame.get("href") or ""
145            # remove query and fragment
146            parsed = urllib.parse.urlparse(href)
147            clean = urllib.parse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, "", "", ""))
148            # If relative, prepend domain
149            if not clean.startswith("http"):
150                if clean.startswith("/"):
151                    detail_url = f"https://prehraj.to{clean}"
152                elif clean:
153                    detail_url = f"https://prehraj.to/{clean}"
154            else:
155                detail_url = clean
156
157        return Link_to_file(name, detail_url, size, Prehrajto_downloader)    
158
159    @staticmethod
160    def get_atributes_from_catalogue(soup) -> Link_to_file:
161        """
162        Získá informace o videu z katalogového HTML elementu (soup) a vrátí Link_to_file.
163        """
164        a_tag = soup.find("a", class_="video--link")
165        if not a_tag:
166            raise ValueError("Unable to find video link in the provided HTML element.")
167        title = a_tag.get("title") or a_tag.find("h3", class_="video__title").text.strip()
168        href = a_tag.get("href")
169        detail_url = f"https://prehraj.to{href}" if href and not href.startswith("http") else href
170
171        # Najdi velikost souboru
172        size_div = a_tag.find("div", class_="video__tag--size")
173        size = size_div.text.strip() if size_div else ""
174
175        return Link_to_file(title, detail_url, size, Prehrajto_downloader)
176
177    @staticmethod
178    def parse_catalogue(page) -> 'Generator[Link_to_file, None, None]':
179        """
180        Prochází stránku s výsledky vyhledávání (nový formát s <div>) a vrací informace o souborech.
181        yield: Link_to_file
182        """
183
184        def find_next_url(soup_obj):
185            """
186            <a href="/hledej/zakl%C3%ADna%C4%8D?vp-page=2" title="Zobrazit další" class="button cta cta--small">Zobrazit další</a>
187            """
188            a = soup_obj.find("a", class_=re.compile(r"\bbutton\b.*\bcta\b.*\bcta--small\b"), string=re.compile(r"Zobrazit další", re.I))
189            if not a:
190                return None
191            href = a.get("href", "").strip()
192            if not href:
193                return None
194            return urllib.parse.urljoin(Prehrajto_downloader.webpage, href)
195        
196        def process_soup_and_yield(soup_obj):
197            grids = soup_obj.find_all("div", class_="grid-x")
198            for grid in grids:
199                for div in grid.find_all("div", recursive=False):
200                    a_tag = div.find("a", class_="video--link")
201                    link_2_file = None
202                    if a_tag:
203                        try:
204                            link_2_file = Prehrajto_downloader.get_atributes_from_catalogue(div)
205                            detail_page = download_page(link_2_file.detail_url)
206                            if not Prehrajto_downloader.is_valid_download_page(detail_page):
207                                raise ValueError(f"Status code: {detail_page.status_code}. Invalid download page: no file to download.")
208                            link_2_file = Prehrajto_downloader.get_atributes_from_file_page(detail_page)
209                            if link_2_file:
210                                yield link_2_file
211                        except ValueError as e:
212                            print_error(f"{str(e)} for file: {(link_2_file.title if link_2_file else 'Unknown')}", False)
213
214        soup = bs4.BeautifulSoup(page, "html.parser")
215        yield from process_soup_and_yield(soup)
216        
217        next_url = find_next_url(soup)
218        if next_url:
219            try:
220                resp = requests.get(next_url)
221                if resp.status_code != 200:
222                    print_error(f"Failed to retrieve search results, status code: {resp.status_code} for URL: {next_url}", False)
223                else:
224                    #TODO: remove recursion. Use loop instead.
225                    yield from Prehrajto_downloader.parse_catalogue(resp.text)
226            except Exception as e:
227                print_error(f"Failed to fetch next page {next_url}: {e}", False)
228
229    @staticmethod
230    def get_download_link_from_detail(detail_url: str) -> str:
231        return f"{detail_url}?do=download"

Downloader from: prehraj.to

webpage = 'https://prehraj.to'

logger = <Logger Prehrajto_downloader (INFO)>

@staticmethod

def generate_search_url(prompt, file_type='video', search_type=None): View Source

29    @staticmethod
30    def generate_search_url(prompt, file_type="video", search_type=None):
31        """
32        Vygeneruje URL pro hledání na prehraj.to.
33        """
34        prompt = prompt.strip().replace(" ", "%20")
35        return f"https://prehraj.to/hledej/{prompt}"

Vygeneruje URL pro hledání na prehraj.to.

def search( self, prompt, file_type='all', search_type='relevance') -> Generator[src.link_to_file.Link_to_file, NoneType, NoneType]: View Source

37    def search(self, prompt, file_type="all", search_type="relevance") -> 'Generator[Link_to_file, None, None]':
38        """
39        Search for files on Datoid.cz.
40        Returns a generator of Link_to_file objects.
41        """
42        if prompt is None or prompt.strip() == "":
43            raise ValueError("Prompt cannot be empty.")
44        url = Prehrajto_downloader.generate_search_url(prompt, file_type, search_type)
45        Prehrajto_downloader.logger.info(f"Searching Prehrajto with URL: {url}")
46        response = requests.get(url)
47        page = response.text
48        if response.status_code != 200:
49            raise ValueError(f"Failed to retrieve search results, status code: {response.status_code} for URL: {url}")
50        return Prehrajto_downloader.parse_catalogue(page)

Search for files on Datoid.cz. Returns a generator of Link_to_file objects.

@staticmethod

def is_valid_download_page(page) -> bool: View Source

 52    @staticmethod
 53    def is_valid_download_page(page) -> bool:
 54        """
 55        Returns False when the page indicates the video is still being processed,
 56        e.g. contains: <div class="status status--success text-center"> Video se zpracovává </div>
 57        Accepts either a requests.Response-like object or raw HTML/text.
 58        """
 59        # Reject non-200 responses
 60        if hasattr(page, "status_code") and page.status_code != 200:
 61            return False
 62
 63        # Get HTML/text
 64        if hasattr(page, "text"):
 65            html = page.text or ""
 66        else:
 67            html = page or ""
 68
 69        if isinstance(html, bytes):
 70            try:
 71                html = html.decode("utf-8", errors="ignore")
 72            except Exception:
 73                html = str(html)
 74
 75        soup = bs4.BeautifulSoup(html, "html.parser")
 76
 77        # remove script/style to avoid false matches
 78        for tag in soup(["script", "style"]):
 79            tag.decompose()
 80
 81        # Check status-like blocks first
 82        status_divs = soup.find_all("div", class_=re.compile(r"\bstatus\b"))
 83        for d in status_divs:
 84            if "video se zpracov" in d.get_text(" ", strip=True).lower():
 85                return False
 86
 87        # Fallback: check whole page text for processing indicators
 88        invalid_texts = (
 89            "Video se zpracovává", 
 90            "video se zpracov",
 91            "zpracováv"
 92        )
 93        
 94        soup = remove_style(soup)
 95        page_text = soup.get_text(" ", strip=True).lower()
 96        if page_text is not None:
 97            text = remove_empty_lines(page_text)
 98            if any_text_coresponds_to(text, invalid_texts):
 99                return False
100
101        # Ensure there is a download anchor present, e.g.
102        # <a id="frame" href="... ?do=download" class="button cta ...">Stáhnout soubor</a>
103        a_frame = soup.find("a", id="frame")
104        if not a_frame:
105            # try fallback: button/cta anchor containing 'stáhnout' or '?do=download' in href
106            a_frame = soup.find("a", class_=re.compile(r"\b(button|cta)\b"), string=re.compile(r"stáhnout", re.I))
107
108        if not a_frame:
109            return False
110
111        href = a_frame.get("href", "") or ""
112        if "?do=download" not in href and "do=download" not in href:
113            # sometimes href could be absolute or contain params; if no download param, treat as invalid
114            return False
115
116        return True

Returns False when the page indicates the video is still being processed, e.g. contains:

Video se zpracovává

Accepts either a requests.Response-like object or raw HTML/text.

@staticmethod

def get_atributes_from_file_page(soup) -> src.link_to_file.Link_to_file: View Source

119    @staticmethod
120    def get_atributes_from_file_page(soup) -> "Link_to_file":
121        """
122        Parse file page parameters and return Link_to_file(title, url, size, Prehrajto_downloader).
123        Accepts either a BeautifulSoup object, a requests.Response, or raw HTML/text.
124        """
125        # Accept Response or raw HTML as input — normalize to BeautifulSoup
126        soup = normalize_to_beautifulsoup(soup)
127
128        name_label = find_label_span_by_regex(soup, r'^\s*Název souboru[:\s]*$')
129        size_label = find_label_span_by_regex(soup, r'^\s*Velikost[:\s]*$')
130        format_label = find_label_span_by_regex(soup, r'^\s*Formát[:\s]*$')
131
132        name = extract_value_from_label(name_label)
133        size = extract_value_from_label(size_label)
134        fmt = extract_value_from_label(format_label).lower().strip()
135
136        # Normalize filename: append extension if format present and not already there
137        if fmt and name and not re.search(r'\.' + re.escape(fmt) + r'\s*$', name, re.I):
138            name = f"{name}.{fmt}"
139        
140        # Get download anchor href and strip query/fragment (detail URL should NOT contain ?do=download)
141        detail_url = ""
142        a_frame = soup.find("a", id="frame")
143        if a_frame:
144            href = a_frame.get("href") or ""
145            # remove query and fragment
146            parsed = urllib.parse.urlparse(href)
147            clean = urllib.parse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, "", "", ""))
148            # If relative, prepend domain
149            if not clean.startswith("http"):
150                if clean.startswith("/"):
151                    detail_url = f"https://prehraj.to{clean}"
152                elif clean:
153                    detail_url = f"https://prehraj.to/{clean}"
154            else:
155                detail_url = clean
156
157        return Link_to_file(name, detail_url, size, Prehrajto_downloader)

Parse file page parameters and return Link_to_file(title, url, size, Prehrajto_downloader). Accepts either a BeautifulSoup object, a requests.Response, or raw HTML/text.

@staticmethod

def get_atributes_from_catalogue(soup) -> src.link_to_file.Link_to_file: View Source

159    @staticmethod
160    def get_atributes_from_catalogue(soup) -> Link_to_file:
161        """
162        Získá informace o videu z katalogového HTML elementu (soup) a vrátí Link_to_file.
163        """
164        a_tag = soup.find("a", class_="video--link")
165        if not a_tag:
166            raise ValueError("Unable to find video link in the provided HTML element.")
167        title = a_tag.get("title") or a_tag.find("h3", class_="video__title").text.strip()
168        href = a_tag.get("href")
169        detail_url = f"https://prehraj.to{href}" if href and not href.startswith("http") else href
170
171        # Najdi velikost souboru
172        size_div = a_tag.find("div", class_="video__tag--size")
173        size = size_div.text.strip() if size_div else ""
174
175        return Link_to_file(title, detail_url, size, Prehrajto_downloader)

Získá informace o videu z katalogového HTML elementu (soup) a vrátí Link_to_file.

@staticmethod

def parse_catalogue(page) -> Generator[src.link_to_file.Link_to_file, NoneType, NoneType]: View Source

177    @staticmethod
178    def parse_catalogue(page) -> 'Generator[Link_to_file, None, None]':
179        """
180        Prochází stránku s výsledky vyhledávání (nový formát s <div>) a vrací informace o souborech.
181        yield: Link_to_file
182        """
183
184        def find_next_url(soup_obj):
185            """
186            <a href="/hledej/zakl%C3%ADna%C4%8D?vp-page=2" title="Zobrazit další" class="button cta cta--small">Zobrazit další</a>
187            """
188            a = soup_obj.find("a", class_=re.compile(r"\bbutton\b.*\bcta\b.*\bcta--small\b"), string=re.compile(r"Zobrazit další", re.I))
189            if not a:
190                return None
191            href = a.get("href", "").strip()
192            if not href:
193                return None
194            return urllib.parse.urljoin(Prehrajto_downloader.webpage, href)
195        
196        def process_soup_and_yield(soup_obj):
197            grids = soup_obj.find_all("div", class_="grid-x")
198            for grid in grids:
199                for div in grid.find_all("div", recursive=False):
200                    a_tag = div.find("a", class_="video--link")
201                    link_2_file = None
202                    if a_tag:
203                        try:
204                            link_2_file = Prehrajto_downloader.get_atributes_from_catalogue(div)
205                            detail_page = download_page(link_2_file.detail_url)
206                            if not Prehrajto_downloader.is_valid_download_page(detail_page):
207                                raise ValueError(f"Status code: {detail_page.status_code}. Invalid download page: no file to download.")
208                            link_2_file = Prehrajto_downloader.get_atributes_from_file_page(detail_page)
209                            if link_2_file:
210                                yield link_2_file
211                        except ValueError as e:
212                            print_error(f"{str(e)} for file: {(link_2_file.title if link_2_file else 'Unknown')}", False)
213
214        soup = bs4.BeautifulSoup(page, "html.parser")
215        yield from process_soup_and_yield(soup)
216        
217        next_url = find_next_url(soup)
218        if next_url:
219            try:
220                resp = requests.get(next_url)
221                if resp.status_code != 200:
222                    print_error(f"Failed to retrieve search results, status code: {resp.status_code} for URL: {next_url}", False)
223                else:
224                    #TODO: remove recursion. Use loop instead.
225                    yield from Prehrajto_downloader.parse_catalogue(resp.text)
226            except Exception as e:
227                print_error(f"Failed to fetch next page {next_url}: {e}", False)

Prochází stránku s výsledky vyhledávání (nový formát s

) a vrací informace o souborech. yield: Link_to_file

@staticmethod

def get_download_link_from_detail(detail_url: str) -> str: View Source

229    @staticmethod
230    def get_download_link_from_detail(detail_url: str) -> str:
231        return f"{detail_url}?do=download"

Get the direct download link from the detail page URL.