Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Saldivar Ezequiel Maximiliano
scrappinStarz
Commits
744af776
Commit
744af776
authored
3 years ago
by
esaldivar
Browse files
Options
Download
Email Patches
Plain Diff
A la informacion de la serie le agrego los capitulos por temporada
parent
d215239d
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
44 additions
and
5 deletions
+44
-5
scrapSeries.py
scrapSeries.py
+43
-4
series.json
series.json
+1
-1
No files found.
scrapSeries.py
View file @
744af776
...
...
@@ -4,6 +4,41 @@ from requests_html import HTMLSession
from
bs4
import
BeautifulSoup
import
json
def
capitulosPorTemporada
(
modal
):
#modal sin parsear y sin BS
temporadas
=
modal
.
html
.
find
(
'div.season-number-container'
,
first
=
True
)
#links = temporadas.absolute_links
capitulos
=
{}
for
link
in
temporadas
.
absolute_links
:
#parseo para obtener mejor las temporadas
bs
=
BeautifulSoup
(
modal
.
html
.
raw_html
,
"html.parser"
)
season
=
bs
.
find
([
'div'
],
class_
=
"season-number d-inline-block text-center active"
)
capitulos
[
f
'
{
season
.
string
}
'
]
=
list
()
episodes
=
bs
.
find_all
([
'div'
],
class_
=
"episode-container row"
)
i
=
1
for
episode
in
episodes
:
metadataEpisodio
=
episode
.
find_all
(
"li"
,
limit
=
5
)
algo
=
{
'numeroCapitulo'
:
i
,
'nombre'
:
episode
.
find
([
'h6'
],
class_
=
"title"
).
string
,
'descripcion'
:
episode
.
find
([
'p'
],
characterwidth
=
"9"
).
string
,
'temporada'
:
f
'
{
season
.
string
}
'
,
'duracion'
:
metadataEpisodio
[
1
].
string
,
'anio'
:
metadataEpisodio
[
2
].
string
}
i
=
i
+
1
capitulos
[
f
'
{
season
.
string
}
'
].
append
(
algo
)
return
capitulos
contenido
=
{}
contenido
[
'series'
]
=
[]
...
...
@@ -16,16 +51,15 @@ r = session.get(urlSeries)
r
.
html
.
render
(
sleep
=
2
,
timeout
=
80000
)
series
=
r
.
html
.
xpath
(
'//*[@id="subview-container"]/starz-view-all/div/div/div/div/section[1]/virtual-scroller/div[2]'
,
first
=
True
)
linksS
=
series
.
absolute_links
##Ahora las itero para obtener los metadatos
for
link
in
linksS
:
for
link
in
series
.
absolute_links
:
#for i in range(0,10):
#link = linksM.pop()
modal
=
session
.
get
(
link
)
modal
.
html
.
render
(
sleep
=
3
,
timeout
=
80000
)
capitulos
=
capitulosPorTemporada
(
modal
)
#Uso BS para obtener la metadata
soup
=
BeautifulSoup
(
modal
.
html
.
raw_html
,
"html.parser"
)
...
...
@@ -47,11 +81,16 @@ for link in linksS:
'genero'
:
metadata_series
[
2
].
string
,
'descripcion'
:
description
,
'link'
:
link
,
'capitulos'
:
list
()
'capitulos'
:
capitulos
,
'disponibilidad'
:
"con suscripcion"
}
print
(
dicc
)
contenido
[
'series'
].
append
(
dicc
)
session
.
close
()
#Armo un archivo Json para poder guardar la informacion en él
with
open
(
'series.json'
,
'w'
)
as
f
:
json
.
dump
(
contenido
,
f
)
This diff is collapsed.
Click to expand it.
series.json
View file @
744af776
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment