datoid
1import bs4 2import urllib.parse 3import logging 4import requests 5from src.decript import datoid_decrypt 6from src.link_to_file import Link_to_file 7from basic_colors import * 8from src.downloader.page_search import * 9 10DEBUG = False 11 12class Datoid_downloader(Download_page_search): 13 """ 14 Downloader from: datoid.cz 15 """ 16 webpage = "https://datoid.cz" 17 18 logger = logging.getLogger("Datoid_downloader") 19 if not logger.hasHandlers(): 20 os.makedirs("logs", exist_ok=True) 21 handler = logging.FileHandler("logs/datoid_downloader.log", encoding="utf-8") 22 formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s") 23 handler.setFormatter(formatter) 24 logger.addHandler(handler) 25 logger.setLevel(logging.INFO) 26 27 def __init__(self): 28 pass 29 30 def search(self, prompt, file_type="all", search_type="relevance") -> 'Generator[Link_to_file, None, None]': 31 """ 32 Search for files on Datoid.cz. 33 Returns a generator of Link_to_file objects. 34 """ 35 if prompt is None or prompt.strip() == "": 36 raise ValueError("Prompt cannot be empty.") 37 url = Datoid_downloader.generate_search_url(prompt, file_type, search_type) 38 Datoid_downloader.logger.info(f"Searching Datoid with URL: {url}") 39 response = download_page(url) 40 if DEBUG: 41 # store page text for debugging 42 with open("debug_datoid_search_page.html", "w", encoding="utf-8") as f: 43 f.write(response.text) 44 Datoid_downloader.logger.info(f"Response received: {response.status_code}") 45 return Datoid_downloader.parse_catalogue(response) 46 47 @staticmethod 48 def generate_search_url(prompt, file_type="all", search_type="relevance"): 49 """ 50 Generate search URL from input atributes. 51 52 TODO: -{Datoid_downloader.search_types[search_type]} 53 """ 54 return f"{Datoid_downloader.webpage}/s/{prompt.replace(' ', '-')}?key=categories&value={Datoid_downloader.file_types[file_type]}" 55 56 @staticmethod 57 def get_atributes_from_catalogue(soup) -> "Link_to_file": 58 if soup is None: 59 raise ValueError("Soup object cannot be None. Catalogue parsing failed.") 60 try: 61 a_tag = soup.find("a") 62 if not a_tag: 63 raise ValueError("No anchor tag found in catalogue item.") 64 65 # Prefer data attributes used by Datoid's JS obfuscation 66 data_href = a_tag.get("data-href") 67 data_title = a_tag.get("data-title") or (a_tag.find("span", class_="filename").text.strip() if a_tag.find("span", class_="filename") else None) 68 data_url = a_tag.get("data-url") 69 70 # Compute link: JS does link = data_url.replace("%s", d(data-href, data-title+salt)); link = link.replace("%s", data-title) 71 if data_href and data_title and data_url: 72 decrypted = datoid_decrypt(data_href, data_title) 73 # replace first %s with decrypted token, second with title 74 try: 75 formatted = data_url.replace("%s", decrypted, 1).replace("%s", data_title, 1) 76 except Exception: 77 formatted = f"/{decrypted}/{data_title}" 78 link = urllib.parse.urljoin(Datoid_downloader.webpage, formatted) 79 else: 80 # Fallback to href attribute 81 href = a_tag.get("href") 82 if not href: 83 raise ValueError("No usable URL found in anchor tag.") 84 link = urllib.parse.urljoin(Datoid_downloader.webpage, href) 85 86 title = data_title or (a_tag.find("span", class_="filename").text.strip() if a_tag.find("span", class_="filename") else "") 87 size_span = a_tag.find("i", class_="icon-size-white") 88 size = None 89 if size_span and size_span.parent: 90 size = size_span.parent.text.strip() 91 link_2_file = Link_to_file(title, link, size, Datoid_downloader) 92 except Exception as e: 93 Datoid_downloader.logger.error(f"Error parsing catalogue attributes: {e} \n Soup content: {soup}") 94 raise ValueError("unable to parse atributes." + str(e)) 95 return link_2_file 96 97 @staticmethod 98 def get_atributes_from_file_page(soup) -> "Link_to_file": 99 if soup is None: 100 raise ValueError("Soup object cannot be None. File page parsing failed.") 101 try: 102 # Název souboru z <h1> 103 title = soup.find("h1").text.strip() 104 size = None 105 # Získání údajů z tabulky parametrů 106 table = soup.find("table", class_="parameters") 107 if table: 108 params = {} 109 for row in table.find_all("tr"): 110 th = row.find("th") 111 td = row.find("td") 112 if th and td: 113 key = th.text.strip().replace(":", "") 114 value = td.text.strip() 115 params[key] = value 116 # Název souboru 117 if "Název souboru" in params: 118 title = params["Název souboru"] 119 # Velikost 120 if "Velikost" in params: 121 size = params["Velikost"] 122 # Typ souboru a Titul lze také použít dle potřeby (params.get("Typ souboru"), params.get("Titul")) 123 # Odkaz ke stažení 124 a_tag = soup.find("a", class_="btn-download") 125 link = Datoid_downloader.webpage + a_tag.get("href") 126 link_2_file = Link_to_file(title, link, size, Datoid_downloader) 127 except Exception as e: 128 Datoid_downloader.logger.error(f"Error parsing file page attributes: {e}\n Soup content: {soup}\n") 129 raise ValueError("unable to parse atributes." + str(e)) 130 return link_2_file 131 132 @staticmethod 133 def test_downloaded_file(link_2_file, download_folder) -> bool: 134 return Download_page_search.test_downloaded_file(link_2_file, download_folder) 135 136 @staticmethod 137 def parse_file_page(page): 138 """ 139 Parse the file page and return the content. 140 141 if not Datoid_downloader.is_valid_download_page(page): 142 raise ValueError("Status code: " + str(page.status_code) + ". Invalid download page: no file to download.") 143 """ 144 soup = bs4.BeautifulSoup(page.text, "html.parser") 145 content = soup.find("div", id="main") 146 if DEBUG: 147 print("Parsed file page content:") 148 print(content.prettify() if content else "No content found.") 149 return content 150 151 @staticmethod 152 def get_download_link_from_detail(detail_url: str) -> str: 153 """ 154 Získá přímý odkaz ke stažení ze stránky s detailem souboru na datoid.cz. 155 """ 156 request = "?request=1" 157 response = requests.get(detail_url + request) 158 if response.status_code != 200: 159 Datoid_downloader.logger.error(f"Request failed with status code {response.status_code} for detail URL: {detail_url}") 160 raise ValueError(f"Request failed with status code {response.status_code}") 161 162 try: 163 json_response = response.json() 164 except Exception as e: 165 Datoid_downloader.logger.error(f"Failed to decode JSON response for detail URL: {detail_url}. Error: {e}") 166 raise ValueError("Failed to decode JSON response.") from e 167 168 if "error" in json_response: 169 Datoid_downloader.logger.error(f"JSON response: {json_response['error']} for detail URL: {detail_url}") 170 raise ValueError("No free slots available.") 171 172 if "download_link" in json_response and json_response["download_link"]: 173 return json_response["download_link"] 174 else: 175 Datoid_downloader.logger.error(f"JSON response: {json_response} for detail URL: {detail_url}") 176 raise ValueError("No download link found in json_response.") 177 178 @staticmethod 179 def parse_catalogue(page) -> 'Generator[Link_to_file, None, None]': 180 """ 181 Iterates through the search results page and returns information about files. 182 183 Yields: Link_to_file 184 """ 185 def process_soup_and_yield(soup_obj): 186 content = soup_obj.find("ul", class_="list", id="snippet--search_files") 187 if content is None: 188 return None 189 content = remove_style(content) 190 for videobox in content.find_all("li"): 191 catalogue_file = None 192 try: 193 catalogue_file = Datoid_downloader.get_atributes_from_catalogue(videobox) 194 download_page_content = Datoid_downloader.parse_file_page(download_page(catalogue_file.detail_url)) 195 link_2_file = Datoid_downloader.get_atributes_from_file_page(download_page_content) 196 yield link_2_file 197 except ValueError as e: 198 Datoid_downloader.logger.error(f"Error: {e}\nSoup content: {download_page_content}\nCatalogue file: {catalogue_file if catalogue_file else 'Unknown'}") 199 200 print_error(str(e) + " for file: " + (catalogue_file.title if catalogue_file else "Unknown"), False) 201 202 def find_next_url(soup_obj): 203 # <a href="/s/zaklinac/2" class="next ajax">Další</a> 204 a = soup_obj.find("a", class_="next ajax") 205 if not a: 206 return None 207 href = a.get("href", "").strip() 208 if not href: 209 return None 210 return urllib.parse.urljoin(Datoid_downloader.webpage, href) 211 212 while True: 213 soup = bs4.BeautifulSoup(page.text, "html.parser") 214 yield from process_soup_and_yield(soup) 215 next_url = find_next_url(soup) 216 if not next_url: 217 break 218 page = download_page(next_url)
DEBUG =
False
class
Datoid_downloader(src.downloader.page_search.Download_page_search):
13class Datoid_downloader(Download_page_search): 14 """ 15 Downloader from: datoid.cz 16 """ 17 webpage = "https://datoid.cz" 18 19 logger = logging.getLogger("Datoid_downloader") 20 if not logger.hasHandlers(): 21 os.makedirs("logs", exist_ok=True) 22 handler = logging.FileHandler("logs/datoid_downloader.log", encoding="utf-8") 23 formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s") 24 handler.setFormatter(formatter) 25 logger.addHandler(handler) 26 logger.setLevel(logging.INFO) 27 28 def __init__(self): 29 pass 30 31 def search(self, prompt, file_type="all", search_type="relevance") -> 'Generator[Link_to_file, None, None]': 32 """ 33 Search for files on Datoid.cz. 34 Returns a generator of Link_to_file objects. 35 """ 36 if prompt is None or prompt.strip() == "": 37 raise ValueError("Prompt cannot be empty.") 38 url = Datoid_downloader.generate_search_url(prompt, file_type, search_type) 39 Datoid_downloader.logger.info(f"Searching Datoid with URL: {url}") 40 response = download_page(url) 41 if DEBUG: 42 # store page text for debugging 43 with open("debug_datoid_search_page.html", "w", encoding="utf-8") as f: 44 f.write(response.text) 45 Datoid_downloader.logger.info(f"Response received: {response.status_code}") 46 return Datoid_downloader.parse_catalogue(response) 47 48 @staticmethod 49 def generate_search_url(prompt, file_type="all", search_type="relevance"): 50 """ 51 Generate search URL from input atributes. 52 53 TODO: -{Datoid_downloader.search_types[search_type]} 54 """ 55 return f"{Datoid_downloader.webpage}/s/{prompt.replace(' ', '-')}?key=categories&value={Datoid_downloader.file_types[file_type]}" 56 57 @staticmethod 58 def get_atributes_from_catalogue(soup) -> "Link_to_file": 59 if soup is None: 60 raise ValueError("Soup object cannot be None. Catalogue parsing failed.") 61 try: 62 a_tag = soup.find("a") 63 if not a_tag: 64 raise ValueError("No anchor tag found in catalogue item.") 65 66 # Prefer data attributes used by Datoid's JS obfuscation 67 data_href = a_tag.get("data-href") 68 data_title = a_tag.get("data-title") or (a_tag.find("span", class_="filename").text.strip() if a_tag.find("span", class_="filename") else None) 69 data_url = a_tag.get("data-url") 70 71 # Compute link: JS does link = data_url.replace("%s", d(data-href, data-title+salt)); link = link.replace("%s", data-title) 72 if data_href and data_title and data_url: 73 decrypted = datoid_decrypt(data_href, data_title) 74 # replace first %s with decrypted token, second with title 75 try: 76 formatted = data_url.replace("%s", decrypted, 1).replace("%s", data_title, 1) 77 except Exception: 78 formatted = f"/{decrypted}/{data_title}" 79 link = urllib.parse.urljoin(Datoid_downloader.webpage, formatted) 80 else: 81 # Fallback to href attribute 82 href = a_tag.get("href") 83 if not href: 84 raise ValueError("No usable URL found in anchor tag.") 85 link = urllib.parse.urljoin(Datoid_downloader.webpage, href) 86 87 title = data_title or (a_tag.find("span", class_="filename").text.strip() if a_tag.find("span", class_="filename") else "") 88 size_span = a_tag.find("i", class_="icon-size-white") 89 size = None 90 if size_span and size_span.parent: 91 size = size_span.parent.text.strip() 92 link_2_file = Link_to_file(title, link, size, Datoid_downloader) 93 except Exception as e: 94 Datoid_downloader.logger.error(f"Error parsing catalogue attributes: {e} \n Soup content: {soup}") 95 raise ValueError("unable to parse atributes." + str(e)) 96 return link_2_file 97 98 @staticmethod 99 def get_atributes_from_file_page(soup) -> "Link_to_file": 100 if soup is None: 101 raise ValueError("Soup object cannot be None. File page parsing failed.") 102 try: 103 # Název souboru z <h1> 104 title = soup.find("h1").text.strip() 105 size = None 106 # Získání údajů z tabulky parametrů 107 table = soup.find("table", class_="parameters") 108 if table: 109 params = {} 110 for row in table.find_all("tr"): 111 th = row.find("th") 112 td = row.find("td") 113 if th and td: 114 key = th.text.strip().replace(":", "") 115 value = td.text.strip() 116 params[key] = value 117 # Název souboru 118 if "Název souboru" in params: 119 title = params["Název souboru"] 120 # Velikost 121 if "Velikost" in params: 122 size = params["Velikost"] 123 # Typ souboru a Titul lze také použít dle potřeby (params.get("Typ souboru"), params.get("Titul")) 124 # Odkaz ke stažení 125 a_tag = soup.find("a", class_="btn-download") 126 link = Datoid_downloader.webpage + a_tag.get("href") 127 link_2_file = Link_to_file(title, link, size, Datoid_downloader) 128 except Exception as e: 129 Datoid_downloader.logger.error(f"Error parsing file page attributes: {e}\n Soup content: {soup}\n") 130 raise ValueError("unable to parse atributes." + str(e)) 131 return link_2_file 132 133 @staticmethod 134 def test_downloaded_file(link_2_file, download_folder) -> bool: 135 return Download_page_search.test_downloaded_file(link_2_file, download_folder) 136 137 @staticmethod 138 def parse_file_page(page): 139 """ 140 Parse the file page and return the content. 141 142 if not Datoid_downloader.is_valid_download_page(page): 143 raise ValueError("Status code: " + str(page.status_code) + ". Invalid download page: no file to download.") 144 """ 145 soup = bs4.BeautifulSoup(page.text, "html.parser") 146 content = soup.find("div", id="main") 147 if DEBUG: 148 print("Parsed file page content:") 149 print(content.prettify() if content else "No content found.") 150 return content 151 152 @staticmethod 153 def get_download_link_from_detail(detail_url: str) -> str: 154 """ 155 Získá přímý odkaz ke stažení ze stránky s detailem souboru na datoid.cz. 156 """ 157 request = "?request=1" 158 response = requests.get(detail_url + request) 159 if response.status_code != 200: 160 Datoid_downloader.logger.error(f"Request failed with status code {response.status_code} for detail URL: {detail_url}") 161 raise ValueError(f"Request failed with status code {response.status_code}") 162 163 try: 164 json_response = response.json() 165 except Exception as e: 166 Datoid_downloader.logger.error(f"Failed to decode JSON response for detail URL: {detail_url}. Error: {e}") 167 raise ValueError("Failed to decode JSON response.") from e 168 169 if "error" in json_response: 170 Datoid_downloader.logger.error(f"JSON response: {json_response['error']} for detail URL: {detail_url}") 171 raise ValueError("No free slots available.") 172 173 if "download_link" in json_response and json_response["download_link"]: 174 return json_response["download_link"] 175 else: 176 Datoid_downloader.logger.error(f"JSON response: {json_response} for detail URL: {detail_url}") 177 raise ValueError("No download link found in json_response.") 178 179 @staticmethod 180 def parse_catalogue(page) -> 'Generator[Link_to_file, None, None]': 181 """ 182 Iterates through the search results page and returns information about files. 183 184 Yields: Link_to_file 185 """ 186 def process_soup_and_yield(soup_obj): 187 content = soup_obj.find("ul", class_="list", id="snippet--search_files") 188 if content is None: 189 return None 190 content = remove_style(content) 191 for videobox in content.find_all("li"): 192 catalogue_file = None 193 try: 194 catalogue_file = Datoid_downloader.get_atributes_from_catalogue(videobox) 195 download_page_content = Datoid_downloader.parse_file_page(download_page(catalogue_file.detail_url)) 196 link_2_file = Datoid_downloader.get_atributes_from_file_page(download_page_content) 197 yield link_2_file 198 except ValueError as e: 199 Datoid_downloader.logger.error(f"Error: {e}\nSoup content: {download_page_content}\nCatalogue file: {catalogue_file if catalogue_file else 'Unknown'}") 200 201 print_error(str(e) + " for file: " + (catalogue_file.title if catalogue_file else "Unknown"), False) 202 203 def find_next_url(soup_obj): 204 # <a href="/s/zaklinac/2" class="next ajax">Další</a> 205 a = soup_obj.find("a", class_="next ajax") 206 if not a: 207 return None 208 href = a.get("href", "").strip() 209 if not href: 210 return None 211 return urllib.parse.urljoin(Datoid_downloader.webpage, href) 212 213 while True: 214 soup = bs4.BeautifulSoup(page.text, "html.parser") 215 yield from process_soup_and_yield(soup) 216 next_url = find_next_url(soup) 217 if not next_url: 218 break 219 page = download_page(next_url)
Downloader from: datoid.cz
def
search( self, prompt, file_type='all', search_type='relevance') -> Generator[src.link_to_file.Link_to_file, NoneType, NoneType]:
31 def search(self, prompt, file_type="all", search_type="relevance") -> 'Generator[Link_to_file, None, None]': 32 """ 33 Search for files on Datoid.cz. 34 Returns a generator of Link_to_file objects. 35 """ 36 if prompt is None or prompt.strip() == "": 37 raise ValueError("Prompt cannot be empty.") 38 url = Datoid_downloader.generate_search_url(prompt, file_type, search_type) 39 Datoid_downloader.logger.info(f"Searching Datoid with URL: {url}") 40 response = download_page(url) 41 if DEBUG: 42 # store page text for debugging 43 with open("debug_datoid_search_page.html", "w", encoding="utf-8") as f: 44 f.write(response.text) 45 Datoid_downloader.logger.info(f"Response received: {response.status_code}") 46 return Datoid_downloader.parse_catalogue(response)
Search for files on Datoid.cz. Returns a generator of Link_to_file objects.
@staticmethod
def
generate_search_url(prompt, file_type='all', search_type='relevance'):
48 @staticmethod 49 def generate_search_url(prompt, file_type="all", search_type="relevance"): 50 """ 51 Generate search URL from input atributes. 52 53 TODO: -{Datoid_downloader.search_types[search_type]} 54 """ 55 return f"{Datoid_downloader.webpage}/s/{prompt.replace(' ', '-')}?key=categories&value={Datoid_downloader.file_types[file_type]}"
Generate search URL from input atributes.
TODO: -{Datoid_downloader.search_types[search_type]}
@staticmethod
def
get_atributes_from_catalogue(soup) -> src.link_to_file.Link_to_file:
57 @staticmethod 58 def get_atributes_from_catalogue(soup) -> "Link_to_file": 59 if soup is None: 60 raise ValueError("Soup object cannot be None. Catalogue parsing failed.") 61 try: 62 a_tag = soup.find("a") 63 if not a_tag: 64 raise ValueError("No anchor tag found in catalogue item.") 65 66 # Prefer data attributes used by Datoid's JS obfuscation 67 data_href = a_tag.get("data-href") 68 data_title = a_tag.get("data-title") or (a_tag.find("span", class_="filename").text.strip() if a_tag.find("span", class_="filename") else None) 69 data_url = a_tag.get("data-url") 70 71 # Compute link: JS does link = data_url.replace("%s", d(data-href, data-title+salt)); link = link.replace("%s", data-title) 72 if data_href and data_title and data_url: 73 decrypted = datoid_decrypt(data_href, data_title) 74 # replace first %s with decrypted token, second with title 75 try: 76 formatted = data_url.replace("%s", decrypted, 1).replace("%s", data_title, 1) 77 except Exception: 78 formatted = f"/{decrypted}/{data_title}" 79 link = urllib.parse.urljoin(Datoid_downloader.webpage, formatted) 80 else: 81 # Fallback to href attribute 82 href = a_tag.get("href") 83 if not href: 84 raise ValueError("No usable URL found in anchor tag.") 85 link = urllib.parse.urljoin(Datoid_downloader.webpage, href) 86 87 title = data_title or (a_tag.find("span", class_="filename").text.strip() if a_tag.find("span", class_="filename") else "") 88 size_span = a_tag.find("i", class_="icon-size-white") 89 size = None 90 if size_span and size_span.parent: 91 size = size_span.parent.text.strip() 92 link_2_file = Link_to_file(title, link, size, Datoid_downloader) 93 except Exception as e: 94 Datoid_downloader.logger.error(f"Error parsing catalogue attributes: {e} \n Soup content: {soup}") 95 raise ValueError("unable to parse atributes." + str(e)) 96 return link_2_file
@staticmethod
def
get_atributes_from_file_page(soup) -> src.link_to_file.Link_to_file:
98 @staticmethod 99 def get_atributes_from_file_page(soup) -> "Link_to_file": 100 if soup is None: 101 raise ValueError("Soup object cannot be None. File page parsing failed.") 102 try: 103 # Název souboru z <h1> 104 title = soup.find("h1").text.strip() 105 size = None 106 # Získání údajů z tabulky parametrů 107 table = soup.find("table", class_="parameters") 108 if table: 109 params = {} 110 for row in table.find_all("tr"): 111 th = row.find("th") 112 td = row.find("td") 113 if th and td: 114 key = th.text.strip().replace(":", "") 115 value = td.text.strip() 116 params[key] = value 117 # Název souboru 118 if "Název souboru" in params: 119 title = params["Název souboru"] 120 # Velikost 121 if "Velikost" in params: 122 size = params["Velikost"] 123 # Typ souboru a Titul lze také použít dle potřeby (params.get("Typ souboru"), params.get("Titul")) 124 # Odkaz ke stažení 125 a_tag = soup.find("a", class_="btn-download") 126 link = Datoid_downloader.webpage + a_tag.get("href") 127 link_2_file = Link_to_file(title, link, size, Datoid_downloader) 128 except Exception as e: 129 Datoid_downloader.logger.error(f"Error parsing file page attributes: {e}\n Soup content: {soup}\n") 130 raise ValueError("unable to parse atributes." + str(e)) 131 return link_2_file
@staticmethod
def
parse_file_page(page):
137 @staticmethod 138 def parse_file_page(page): 139 """ 140 Parse the file page and return the content. 141 142 if not Datoid_downloader.is_valid_download_page(page): 143 raise ValueError("Status code: " + str(page.status_code) + ". Invalid download page: no file to download.") 144 """ 145 soup = bs4.BeautifulSoup(page.text, "html.parser") 146 content = soup.find("div", id="main") 147 if DEBUG: 148 print("Parsed file page content:") 149 print(content.prettify() if content else "No content found.") 150 return content
Parse the file page and return the content.
if not Datoid_downloader.is_valid_download_page(page): raise ValueError("Status code: " + str(page.status_code) + ". Invalid download page: no file to download.")
@staticmethod
def
get_download_link_from_detail(detail_url: str) -> str:
152 @staticmethod 153 def get_download_link_from_detail(detail_url: str) -> str: 154 """ 155 Získá přímý odkaz ke stažení ze stránky s detailem souboru na datoid.cz. 156 """ 157 request = "?request=1" 158 response = requests.get(detail_url + request) 159 if response.status_code != 200: 160 Datoid_downloader.logger.error(f"Request failed with status code {response.status_code} for detail URL: {detail_url}") 161 raise ValueError(f"Request failed with status code {response.status_code}") 162 163 try: 164 json_response = response.json() 165 except Exception as e: 166 Datoid_downloader.logger.error(f"Failed to decode JSON response for detail URL: {detail_url}. Error: {e}") 167 raise ValueError("Failed to decode JSON response.") from e 168 169 if "error" in json_response: 170 Datoid_downloader.logger.error(f"JSON response: {json_response['error']} for detail URL: {detail_url}") 171 raise ValueError("No free slots available.") 172 173 if "download_link" in json_response and json_response["download_link"]: 174 return json_response["download_link"] 175 else: 176 Datoid_downloader.logger.error(f"JSON response: {json_response} for detail URL: {detail_url}") 177 raise ValueError("No download link found in json_response.")
Získá přímý odkaz ke stažení ze stránky s detailem souboru na datoid.cz.
@staticmethod
def
parse_catalogue(page) -> Generator[src.link_to_file.Link_to_file, NoneType, NoneType]:
179 @staticmethod 180 def parse_catalogue(page) -> 'Generator[Link_to_file, None, None]': 181 """ 182 Iterates through the search results page and returns information about files. 183 184 Yields: Link_to_file 185 """ 186 def process_soup_and_yield(soup_obj): 187 content = soup_obj.find("ul", class_="list", id="snippet--search_files") 188 if content is None: 189 return None 190 content = remove_style(content) 191 for videobox in content.find_all("li"): 192 catalogue_file = None 193 try: 194 catalogue_file = Datoid_downloader.get_atributes_from_catalogue(videobox) 195 download_page_content = Datoid_downloader.parse_file_page(download_page(catalogue_file.detail_url)) 196 link_2_file = Datoid_downloader.get_atributes_from_file_page(download_page_content) 197 yield link_2_file 198 except ValueError as e: 199 Datoid_downloader.logger.error(f"Error: {e}\nSoup content: {download_page_content}\nCatalogue file: {catalogue_file if catalogue_file else 'Unknown'}") 200 201 print_error(str(e) + " for file: " + (catalogue_file.title if catalogue_file else "Unknown"), False) 202 203 def find_next_url(soup_obj): 204 # <a href="/s/zaklinac/2" class="next ajax">Další</a> 205 a = soup_obj.find("a", class_="next ajax") 206 if not a: 207 return None 208 href = a.get("href", "").strip() 209 if not href: 210 return None 211 return urllib.parse.urljoin(Datoid_downloader.webpage, href) 212 213 while True: 214 soup = bs4.BeautifulSoup(page.text, "html.parser") 215 yield from process_soup_and_yield(soup) 216 next_url = find_next_url(soup) 217 if not next_url: 218 break 219 page = download_page(next_url)
Iterates through the search results page and returns information about files.
Yields: Link_to_file