{"id":541798154,"node_id":"R_kgDOIEsvCg","name":"unstructured","full_name":"Unstructured-IO/unstructured","private":false,"owner":{"login":"Unstructured-IO","id":108372208,"node_id":"O_kgDOBnWg8A","avatar_url":"https://avatars.githubusercontent.com/u/108372208?v=4","gravatar_id":"","url":"https://api.github.com/users/Unstructured-IO","html_url":"https://github.com/Unstructured-IO","followers_url":"https://api.github.com/users/Unstructured-IO/followers","following_url":"https://api.github.com/users/Unstructured-IO/following{/other_user}","gists_url":"https://api.github.com/users/Unstructured-IO/gists{/gist_id}","starred_url":"https://api.github.com/users/Unstructured-IO/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/Unstructured-IO/subscriptions","organizations_url":"https://api.github.com/users/Unstructured-IO/orgs","repos_url":"https://api.github.com/users/Unstructured-IO/repos","events_url":"https://api.github.com/users/Unstructured-IO/events{/privacy}","received_events_url":"https://api.github.com/users/Unstructured-IO/received_events","type":"Organization","user_view_type":"public","site_admin":false},"html_url":"https://github.com/Unstructured-IO/unstructured","description":"Convert documents to structured data effortlessly. Unstructured is open-source ETL solution for transforming complex documents into clean, structured formats for language models.  Visit our website to learn more about our enterprise grade Platform product for production grade workflows, partitioning, enrichments, chunking and embedding.","fork":false,"url":"https://api.github.com/repos/Unstructured-IO/unstructured","forks_url":"https://api.github.com/repos/Unstructured-IO/unstructured/forks","keys_url":"https://api.github.com/repos/Unstructured-IO/unstructured/keys{/key_id}","collaborators_url":"https://api.github.com/repos/Unstructured-IO/unstructured/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/Unstructured-IO/unstructured/teams","hooks_url":"https://api.github.com/repos/Unstructured-IO/unstructured/hooks","issue_events_url":"https://api.github.com/repos/Unstructured-IO/unstructured/issues/events{/number}","events_url":"https://api.github.com/repos/Unstructured-IO/unstructured/events","assignees_url":"https://api.github.com/repos/Unstructured-IO/unstructured/assignees{/user}","branches_url":"https://api.github.com/repos/Unstructured-IO/unstructured/branches{/branch}","tags_url":"https://api.github.com/repos/Unstructured-IO/unstructured/tags","blobs_url":"https://api.github.com/repos/Unstructured-IO/unstructured/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/Unstructured-IO/unstructured/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/Unstructured-IO/unstructured/git/refs{/sha}","trees_url":"https://api.github.com/repos/Unstructured-IO/unstructured/git/trees{/sha}","statuses_url":"https://api.github.com/repos/Unstructured-IO/unstructured/statuses/{sha}","languages_url":"https://api.github.com/repos/Unstructured-IO/unstructured/languages","stargazers_url":"https://api.github.com/repos/Unstructured-IO/unstructured/stargazers","contributors_url":"https://api.github.com/repos/Unstructured-IO/unstructured/contributors","subscribers_url":"https://api.github.com/repos/Unstructured-IO/unstructured/subscribers","subscription_url":"https://api.github.com/repos/Unstructured-IO/unstructured/subscription","commits_url":"https://api.github.com/repos/Unstructured-IO/unstructured/commits{/sha}","git_commits_url":"https://api.github.com/repos/Unstructured-IO/unstructured/git/commits{/sha}","comments_url":"https://api.github.com/repos/Unstructured-IO/unstructured/comments{/number}","issue_comment_url":"https://api.github.com/repos/Unstructured-IO/unstructured/issues/comments{/number}","contents_url":"https://api.github.com/repos/Unstructured-IO/unstructured/contents/{+path}","compare_url":"https://api.github.com/repos/Unstructured-IO/unstructured/compare/{base}...{head}","merges_url":"https://api.github.com/repos/Unstructured-IO/unstructured/merges","archive_url":"https://api.github.com/repos/Unstructured-IO/unstructured/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/Unstructured-IO/unstructured/downloads","issues_url":"https://api.github.com/repos/Unstructured-IO/unstructured/issues{/number}","pulls_url":"https://api.github.com/repos/Unstructured-IO/unstructured/pulls{/number}","milestones_url":"https://api.github.com/repos/Unstructured-IO/unstructured/milestones{/number}","notifications_url":"https://api.github.com/repos/Unstructured-IO/unstructured/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/Unstructured-IO/unstructured/labels{/name}","releases_url":"https://api.github.com/repos/Unstructured-IO/unstructured/releases{/id}","deployments_url":"https://api.github.com/repos/Unstructured-IO/unstructured/deployments","created_at":"2022-09-26T21:53:41Z","updated_at":"2026-04-08T16:42:49Z","pushed_at":"2026-04-08T16:14:00Z","git_url":"git://github.com/Unstructured-IO/unstructured.git","ssh_url":"git@github.com:Unstructured-IO/unstructured.git","clone_url":"https://github.com/Unstructured-IO/unstructured.git","svn_url":"https://github.com/Unstructured-IO/unstructured","homepage":"https://www.unstructured.io/","size":236527,"stargazers_count":14414,"watchers_count":14414,"language":"HTML","has_issues":true,"has_projects":false,"has_downloads":true,"has_wiki":true,"has_pages":true,"has_discussions":true,"forks_count":1207,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":243,"license":{"key":"apache-2.0","name":"Apache License 2.0","spdx_id":"Apache-2.0","url":"https://api.github.com/licenses/apache-2.0","node_id":"MDc6TGljZW5zZTI="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"has_pull_requests":true,"pull_request_creation_policy":"all","topics":["data-pipelines","deep-learning","document-image-analysis","document-image-processing","document-parser","document-parsing","docx","donut","information-retrieval","langchain","llm","machine-learning","ml","natural-language-processing","nlp","ocr","pdf","pdf-to-json","pdf-to-text","preprocessing"],"visibility":"public","forks":1207,"open_issues":243,"watchers":14414,"default_branch":"main","temp_clone_token":null,"custom_properties":{},"organization":{"login":"Unstructured-IO","id":108372208,"node_id":"O_kgDOBnWg8A","avatar_url":"https://avatars.githubusercontent.com/u/108372208?v=4","gravatar_id":"","url":"https://api.github.com/users/Unstructured-IO","html_url":"https://github.com/Unstructured-IO","followers_url":"https://api.github.com/users/Unstructured-IO/followers","following_url":"https://api.github.com/users/Unstructured-IO/following{/other_user}","gists_url":"https://api.github.com/users/Unstructured-IO/gists{/gist_id}","starred_url":"https://api.github.com/users/Unstructured-IO/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/Unstructured-IO/subscriptions","organizations_url":"https://api.github.com/users/Unstructured-IO/orgs","repos_url":"https://api.github.com/users/Unstructured-IO/repos","events_url":"https://api.github.com/users/Unstructured-IO/events{/privacy}","received_events_url":"https://api.github.com/users/Unstructured-IO/received_events","type":"Organization","user_view_type":"public","site_admin":false},"network_count":1207,"subscribers_count":69}