mirror of
https://github.com/not-kennethreitz/markdownplease.com.git
synced 2026-06-05 23:20:19 +00:00
Revert
This commit is contained in:
@@ -3,28 +3,18 @@
|
||||
This is a very simple web service that will take a given URL, and return
|
||||
a Markdown representation of that page.
|
||||
|
||||
Powered by [Readability](http://readability.com/),
|
||||
[Requests](http://python-guide.org/),
|
||||
[html2text](http://www.aaronsw.com/2002/html2text/),
|
||||
[markdown](http://pythonhosted.org/Markdown/),
|
||||
and [Flask](http://flask.pocoo.org/).
|
||||
Powered by [Readability](http://readability.com/), [Requests](http://python-guide.org/), [html2text](http://www.aaronsw.com/2002/html2text/), and [Flask](http://flask.pocoo.org/).
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
$ curl http://url2markdown.herokuapp.com/?url=http://kennethreitz.org
|
||||
|
||||
|
||||
# Hi, there.
|
||||
|
||||
|
||||
My name is Kenneth Reitz.
|
||||
...
|
||||
|
||||
Or, if you understand code:
|
||||
|
||||
$ mkvirtualenv url2markdown
|
||||
$ pip install -r requirements.txt
|
||||
$ READABILITY_TOKEN="XXX" python service.py
|
||||
|
||||
Enjoy!
|
||||
|
||||
## Configuration
|
||||
@@ -38,4 +28,3 @@ You can use [autoenv](https://github.com/kennethreitz/autoenv) to do this easily
|
||||
## License
|
||||
|
||||
Unfortunately, this code is released under [GPLv3](http://www.gnu.org/copyleft/gpl.html).
|
||||
|
||||
|
||||
+13
-21
@@ -1,44 +1,36 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
|
||||
|
||||
import os
|
||||
|
||||
import requests
|
||||
from html2text import html2text
|
||||
|
||||
READABILITY_URL = 'https://www.readability.com/api/content/v1/parser'
|
||||
|
||||
_READABILITY_URL = 'https://www.readability.com/api/content/v1/parser'
|
||||
|
||||
|
||||
def _get_readability_html_and_title(url):
|
||||
def readability(url):
|
||||
token = os.environ.get('READABILITY_TOKEN')
|
||||
params = {'url': url, 'token': token}
|
||||
|
||||
r = requests.get(_READABILITY_URL, params=params)
|
||||
decoded_content = (
|
||||
r.json()['content'],
|
||||
r.json()['title'],
|
||||
)
|
||||
return decoded_content
|
||||
r = requests.get(READABILITY_URL, params=params)
|
||||
return r.json()['content'], r.json()['title']
|
||||
|
||||
|
||||
def _convert_html_to_markdown(html, title=None):
|
||||
def convert(html, title=None):
|
||||
if title:
|
||||
title = '# {}'.format(title)
|
||||
html = '\n\n'.join([title, html])
|
||||
|
||||
text_from_html = html2text(html)
|
||||
return text_from_html
|
||||
return html2text(html)
|
||||
|
||||
|
||||
def get_readable_content_from_url(url):
|
||||
def meh(url):
|
||||
try:
|
||||
content, title = _get_readability_html_and_title(url)
|
||||
markdown = _convert_html_to_markdown(content, title=title)
|
||||
return markdown
|
||||
content, title = readability(url)
|
||||
return convert(content, title=title)
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print get_readable_content_from_url('http://kennethreitz.org/')
|
||||
|
||||
print meh('http://kennethreitz.org/')
|
||||
@@ -8,4 +8,3 @@ html2text==3.200.3
|
||||
itsdangerous==0.23
|
||||
requests==2.0.0
|
||||
wsgiref==0.1.2
|
||||
markdown==2.3.1
|
||||
|
||||
+6
-27
@@ -1,40 +1,19 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from flask import Flask, request, render_template, Markup
|
||||
from converter import get_readable_content_from_url
|
||||
from markdown import markdown
|
||||
|
||||
from flask import Flask, request, redirect, url_for, render_template
|
||||
from converter import meh
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
def _markdown_to_html(text):
|
||||
return Markup(markdown(text))
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def fuck_gpl3():
|
||||
url = request.args.get('url')
|
||||
type = request.args.get('type', 'markdown')
|
||||
|
||||
content = get_readable_content_from_url(url)
|
||||
|
||||
if url:
|
||||
if not content:
|
||||
return '404 Not Found', 404
|
||||
|
||||
if type == 'html':
|
||||
markdown_url_contents = _markdown_to_html(content)
|
||||
return render_template(
|
||||
'index.html',
|
||||
converted_url_contents=markdown_url_contents,
|
||||
page_url=url,
|
||||
)
|
||||
else:
|
||||
content = meh(url)
|
||||
if content:
|
||||
return content, 200, {'Content-Type': 'text/x-markdown; charset=UTF-8'}
|
||||
else:
|
||||
return '404 Not Found', 404
|
||||
else:
|
||||
return render_template('index.html')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run()
|
||||
|
||||
Vendored
-9
File diff suppressed because one or more lines are too long
+15
-66
@@ -1,72 +1,21 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Markdown Please!</title>
|
||||
<link
|
||||
href="{{ url_for('static', filename='css/bootstrap.min.css' ) }}"
|
||||
rel="stylesheet"
|
||||
/>
|
||||
</head>
|
||||
<body>
|
||||
<nav class="navbar navbar-default navbar-fixed-top">
|
||||
<div class="container">
|
||||
<ul class="nav">
|
||||
<a class="navbar-brand" href="/">Markdown Please!</a>
|
||||
<head>
|
||||
<title>url2markdown</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>url2markdown</h1>
|
||||
<p>This is a very simple web service that will take a given URL, and return
|
||||
a Markdown representation of that page.</p>
|
||||
|
||||
<form action="/" method="get" class="navbar-form navbar-left">
|
||||
<fieldset>
|
||||
<input
|
||||
type="text"
|
||||
name="url"
|
||||
class="form-control"
|
||||
style="width: 300px;"
|
||||
placeholder="http://en.wikipedia.org/wiki/Markdown"
|
||||
{% if page_url %}
|
||||
value="{{ page_url }}"
|
||||
{% endif %}
|
||||
/>
|
||||
<input type="hidden" name="type" value="html" />
|
||||
<button type="submit" class="btn btn-default">
|
||||
Go
|
||||
</button>
|
||||
</fieldset>
|
||||
</form>
|
||||
<form action="/" method="get">
|
||||
URL: <input type="text" name="url">
|
||||
<button type='submit'>Submit</button>
|
||||
</form>
|
||||
|
||||
{% if page_url %}
|
||||
<a
|
||||
href="/?url={{ page_url }}"
|
||||
class="btn btn-default navbar-btn navbar-right"
|
||||
>
|
||||
Get the Markdown
|
||||
</a>
|
||||
{% endif %}
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<p>A <a href='http://kennethreitz.org/projects/'>Kenneth Reitz</a> project.</p>
|
||||
|
||||
<div class="container">
|
||||
<div style="padding-top:40px;padding-bottom:80px;">
|
||||
{% if page_url %}
|
||||
<div>
|
||||
{{ converted_url_contents }}
|
||||
</div>
|
||||
{% endif %}
|
||||
<a href="https://github.com/kennethreitz/url2markdown"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub"></a>
|
||||
|
||||
<a href="https://github.com/kennethreitz/url2markdown">
|
||||
<img
|
||||
style="position: absolute; top: 0; right: 0; border: 0; z-index: 10000;"
|
||||
src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png"
|
||||
alt="Fork me on GitHub"
|
||||
/>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<footer class="navbar navbar-fixed-bottom panel-footer">
|
||||
<p class="container">
|
||||
A <a href="http://kennethreitz.org/projects/">Kenneth Reitz</a>/
|
||||
<a href="http://lumbercoder.com/">Gil Goncalves</a> project.
|
||||
</p>
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user