Commit 744af776 authored by esaldivar's avatar esaldivar
Browse files

A la informacion de la serie le agrego los capitulos por temporada

parent d215239d
......@@ -4,6 +4,41 @@ from requests_html import HTMLSession
from bs4 import BeautifulSoup
import json
def capitulosPorTemporada(modal):
#modal sin parsear y sin BS
temporadas = modal.html.find('div.season-number-container', first=True)
#links = temporadas.absolute_links
capitulos = {}
for link in temporadas.absolute_links:
#parseo para obtener mejor las temporadas
bs = BeautifulSoup(modal.html.raw_html, "html.parser")
season = bs.find(['div'], class_="season-number d-inline-block text-center active")
capitulos[f'{season.string}'] = list()
episodes = bs.find_all(['div'], class_="episode-container row")
i = 1
for episode in episodes:
metadataEpisodio = episode.find_all("li", limit=5)
algo = {
'numeroCapitulo':i,
'nombre': episode.find(['h6'], class_="title").string,
'descripcion': episode.find(['p'], characterwidth="9").string,
'temporada': f'{season.string}',
'duracion':metadataEpisodio[1].string,
'anio':metadataEpisodio[2].string
}
i = i+1
capitulos[f'{season.string}'].append(algo)
return capitulos
contenido={}
contenido['series'] = []
......@@ -16,16 +51,15 @@ r = session.get(urlSeries)
r.html.render(sleep=2,timeout=80000)
series = r.html.xpath('//*[@id="subview-container"]/starz-view-all/div/div/div/div/section[1]/virtual-scroller/div[2]', first=True)
linksS = series.absolute_links
##Ahora las itero para obtener los metadatos
for link in linksS:
for link in series.absolute_links:
#for i in range(0,10):
#link = linksM.pop()
modal = session.get(link)
modal.html.render(sleep=3, timeout=80000)
capitulos = capitulosPorTemporada(modal)
#Uso BS para obtener la metadata
soup = BeautifulSoup(modal.html.raw_html, "html.parser")
......@@ -47,11 +81,16 @@ for link in linksS:
'genero': metadata_series[2].string,
'descripcion': description,
'link': link,
'capitulos': list()
'capitulos': capitulos,
'disponibilidad': "con suscripcion"
}
print(dicc)
contenido['series'].append(dicc)
session.close()
#Armo un archivo Json para poder guardar la informacion en él
with open('series.json', 'w') as f:
json.dump(contenido, f)
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment